import os
import pandas as pd
import geopandas as gpd
import rasterio
import rasterio.plot
import rasterstats
from rasterstats import zonal_stats
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display
import plotly.express as px
import plotly.offline
plotly.offline.init_notebook_mode()
print('All libraries successfully imported!')
print(f'Rasterstats : {rasterstats.__version__}')
All libraries successfully imported! Rasterstats : 0.14.0
computer_path = '/export/miro/ndeffense/LBRAT2104/'
grp_letter = 'X'
lut_path = f'{computer_path}data/LUT/'
# Directory for all work files
work_path = f'{computer_path}GROUP_{grp_letter}/WORK/'
classif_path = f'{work_path}CLASSIF/'
zonal_stat_path = f'{work_path}ZONAL_STATS/'
Path(zonal_stat_path).mkdir(parents=True, exist_ok=True)
print(f'Zonal Statistics path is set to : {zonal_stat_path}')
Zonal Statistics path is set to : /export/miro/ndeffense/LBRAT2104/GROUP_X/WORK/ZONAL_STATS/
site = 'NAMUR'
year = '2020'
nodata_val = -10000
vector_field = 'name'
feat_nb = 1
field_classif_code = 'grp_1_nb'
field_classif_name = 'grp_1'
reclassify_flag = True
if reclassify_flag:
field_reclassif_code = 'grp_A_nb'
field_reclassif_name = 'grp_A'
field_lut_code = field_reclassif_code
field_lut_name = field_reclassif_name
else:
field_lut_code = field_classif_code
field_lut_name = field_classif_name
lut_file = f'{lut_path}crop_dictionary_new.xlsx'
vector_file = f'{work_path}ROI/extent_roi_32631.shp'
raster_file = f'{classif_path}{site}_{year}_classif_RF_feat_{feat_nb}_{field_classif_name}.tif'
if reclassify_flag:
raster_file = f'{classif_path}{site}_{year}_classif_RF_feat_{feat_nb}_{field_classif_name}_reclassify_{field_reclassif_name}.tif'
zonal_stat_csv = f'{zonal_stat_path}zonal_stat_cat.csv'
zonal_stat_html = f'/export/miro/ndeffense/LBRAT2104/GIT/eo-toolbox/notebooks/A_Zonal_Statistics/figures/{site}_{year}_Zonal_Stat.html'
gdf = gpd.read_file(vector_file)
src = rasterio.open(raster_file, "r")
crs_vector = str(gdf.crs).split(":",1)[1]
crs_raster = str(src.crs).split(":",1)[1]
if crs_vector == crs_raster:
print(f'CRS are the same : EPSG:{crs_vector} = EPSG:{crs_raster}')
else:
print('We must reproject vector file')
gdf = gdf.to_crs(epsg=crs_raster)
CRS are the same : EPSG:32631 = EPSG:32631
Check if raster and vector file are intersecting
fig, ax = plt.subplots(1, 1, figsize=(10,10))
# Plot vector
gdf.plot(facecolor='none', edgecolor='red', linewidth = 4, ax=ax)
# Plot image
rasterio.plot.show(src, ax=ax)
plt.box(False)
rasterstats will report using the pixel values as keys. To associate the pixel values with their appropriate meaning, you can use a category_map
if os.path.isfile(lut_file):
lut_df = pd.read_excel(lut_file)
lut_df = lut_df[[field_lut_code,field_lut_name]].drop_duplicates()
cmap = {}
for index, row in lut_df.iterrows():
nb = row[field_lut_code]
name = row[field_lut_name]
cmap[nb] = f'{nb} - {name}'
else:
cmap = None
print(cmap)
{0: '0 - Remove', 111: '111 - Wheat', 112: '112 - Maize', 113: '113 - Rice', 114: '114 - Sorghum', 115: '115 - Barley', 116: '116 - Rye', 117: '117 - Oats', 118: '118 - Millets', 119: '119 - Other cereals', 121: '121 - Leafy or stem vegetables', 122: '122 - Fruit-bearing vegetables', 123: '123 - Root, bulb or tuberous vegetables', 124: '124 - Mushrooms and truffles', 141: '141 - Soya beans', 142: '142 - Groundnuts', 143: '143 - Other oilseed crops', 151: '151 - Potatoes', 152: '152 - Sweet potatoes', 153: '153 - Cassava', 154: '154 - Yams', 161: '161 - Spice crops', 162: '162 - Hops', 17: '17 - Leguminous crops', 181: '181 - Sugar beet', 182: '182 - Sugar cane', 3: '3 - Grassland and meadows', 192: '192 - Fibre crops', 1931: '1931 - Medicinal, aromatic, pesticidal or similar crops', 1941: '1941 - Flowers crops', 1991: '1991 - Tobacco', 21: '21 - Fruits trees', 22: '22 - Vineyards', 23: '23 - Olive groves', 24: '24 - Trees', 291: '291 - Succulent plant', 5: '5 - Shrub land', 6: '6 - Forest', 7: '7 - Bare soil', 8: '8 - Build-up surface', 9: '9 - Water bodies'}
dict_list = []
for i, row in gdf.iterrows():
name = row[vector_field]
dict_freq = zonal_stats(row.geometry,
raster_file,
categorical=True,
category_map=cmap,
nodata=nodata_val)[0]
dict_freq['name'] = name
dict_list.append(dict_freq)
df = pd.DataFrame(dict_list).set_index('name')
df.to_csv(zonal_stat_csv)
print(f'CSV file was created : {zonal_stat_csv}')
df = df.transpose()
df = df.sort_values(by=df.columns[0], ascending=False)
display(df)
ROI_wallonia CSV file was created : /export/miro/ndeffense/LBRAT2104/GROUP_X/WORK/ZONAL_STATS/zonal_stat_cat.csv
name | ROI_wallonia |
---|---|
8 - Build-up surface | 170018 |
3 - Grassland and meadows | 154137 |
6 - Forest | 108223 |
111 - Wheat | 48069 |
112 - Maize | 12706 |
21 - Fruits trees | 8733 |
181 - Sugar beet | 8466 |
151 - Potatoes | 7949 |
115 - Barley | 7741 |
17 - Leguminous crops | 7729 |
119 - Other cereals | 7148 |
9 - Water bodies | 5909 |
192 - Fibre crops | 5603 |
117 - Oats | 3265 |
143 - Other oilseed crops | 2262 |
22 - Vineyards | 1625 |
121 - Leafy or stem vegetables | 1451 |
df['percent'] = ((df.iloc[:,0]/ df.iloc[:,0].sum())*100).round(2)
display(df)
name | ROI_wallonia | percent |
---|---|---|
8 - Build-up surface | 168422 | 30.02 |
3 - Grassland and meadows | 153662 | 27.39 |
6 - Forest | 109232 | 19.47 |
111 - Wheat | 47519 | 8.47 |
112 - Maize | 12461 | 2.22 |
21 - Fruits trees | 10362 | 1.85 |
181 - Sugar beet | 8457 | 1.51 |
115 - Barley | 8283 | 1.48 |
151 - Potatoes | 7983 | 1.42 |
119 - Other cereals | 7665 | 1.37 |
17 - Leguminous crops | 7441 | 1.33 |
9 - Water bodies | 5937 | 1.06 |
192 - Fibre crops | 5533 | 0.99 |
117 - Oats | 2942 | 0.52 |
143 - Other oilseed crops | 2262 | 0.40 |
22 - Vineyards | 1497 | 0.27 |
121 - Leafy or stem vegetables | 1376 | 0.25 |
fig = px.bar(df,
x=df.index.str.slice(start=0, stop=20),
y=df.columns[0],
text='percent',
labels={'x': "Crop category",df.columns[0]:"Number of pixels"})
#fig.update_xaxes(tickfont_size=15)
#fig.update_yaxes(title_font=dict(size=20), tickfont_size=15)
#fig.update_layout(font_size=20)
fig.update_layout(xaxis_title=None)
fig.show()
fig.write_html(zonal_stat_html, full_html=False)