Commit 660b4654 authored by Yoann Cartier's avatar Yoann Cartier
Browse files

16 august morning

parent a9dbe883
%% Cell type:markdown id:casual-thirty tags:
<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Create-patch-dataset" data-toc-modified-id="Create-patch-dataset-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Create patch dataset</a></span></li><li><span><a href="#Analysis-of-lost-classes" data-toc-modified-id="Analysis-of-lost-classes-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Analysis of lost classes</a></span></li></ul></div>
<div class="toc"><ul class="toc-item"><li><span><a href="#Create-patch-dataset" data-toc-modified-id="Create-patch-dataset-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Create patch dataset</a></span></li><li><span><a href="#Create-hybrids" data-toc-modified-id="Create-hybrids-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Create hybrids</a></span></li><li><span><a href="#Analysis-of-lost-classes" data-toc-modified-id="Analysis-of-lost-classes-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Analysis of lost classes</a></span></li></ul></div>
%% Cell type:code id:nuclear-vision tags:
``` python
import pandas as pd
......@@ -48,11 +48,11 @@
%% Cell type:code id:religious-musical tags:
``` python
#Select features
data=pd.read_csv("../data/Patch/R0_extract.csv")
data=pd.read_csv("../data/Patch/R20_extract.csv")
list_to_drop=['Unnamed: 0','lon','lat','ai','npp','elevation','gdd0','gdd10','gddlgd0','gddlgd10','gddlgd5','gdgfgd0','gdgfgd10','gdgfgd5','ngd0','ngd10','ngd5','fgd','lgd', 'netPrimaryProduction']+list_clim
data.drop(list_to_drop,axis=1,inplace=True)
data=data.astype('float')
#Add slope
slope=pd.read_csv('../data/Patch/slope_eunisforest.csv')
......@@ -63,20 +63,20 @@
```
%% Cell type:code id:waiting-thunder tags:
``` python
#data.drop(['landuseLandcover'],axis=1,inplace=True)
data.drop(['landuseLandcover'],axis=1,inplace=True)
```
%% Cell type:code id:dramatic-excerpt tags:
``` python
data=data.dropna()
```
%% Cell type:markdown id:appreciated-transcription tags:
%% Cell type:markdown id:amazing-morrison tags:
Begin imputation with MICE :
%% Cell type:code id:standard-testimony tags:
......@@ -103,14 +103,10 @@
Datasets: 5
Iterations: 3
Imputed Variables: 42
save_all_iterations: True
%% Cell type:markdown id:guided-consistency tags:
If there is 'landuse_xx', we must change the name before renaming with land cover names :
%% Cell type:code id:opponent-logging tags:
``` python
plt.rcParams["figure.figsize"]=(20,20)
kernel.plot_imputed_distributions(wspace=0.3,hspace=0.3);
......@@ -144,10 +140,16 @@
``` python
median_dataset.to_csv('../data/Patch/imputed datasets/median_dataset.csv')
```
%% Cell type:markdown id:opening-authorization tags:
End imputation with MICE.
If patch :
If there is 'landuse_xx', we must change the name before renaming with land cover names :
%% Cell type:code id:hollow-finding tags:
``` python
#data=pd.concat([data.habitat,median_dataset],axis=1)
list_former_names=[]
......@@ -159,66 +161,111 @@
list_new_names.append(col_name[-1])
dict_change_names=dict(zip(list_former_names,list_new_names))
data=data.rename(columns=dict_change_names)
```
%% Cell type:markdown id:extra-nightmare tags:
%% Cell type:markdown id:urban-means tags:
End imputation with MICE.
If not patch, directly here :
%% Cell type:code id:mental-fossil tags:
``` python
#median_dataset.drop('Unnamed: 0',axis=1,inplace=True)
data = data.astype({'landuseLandcover': np.int})
#data = data.astype({'landuseLandcover': np.int})
#Catagorical land use to one hot vector
data=categorical_to_one_hot(data,'landuseLandcover')
#data=categorical_to_one_hot(data,'landuseLandcover')
#Rename variables
variable_names=pd.read_excel('../data/variable_names.xlsx')
variable_names=variable_names.astype({'variable': np.str})
data=rename_columns(data,variable_names)
```
%% Cell type:code id:essential-tongue tags:
%% Cell type:code id:acquired-slovak tags:
``` python
data=data.rename(columns={'SLOPE_1':'slope'})
```
%% Cell type:code id:right-religion tags:
%% Cell type:code id:increasing-avenue tags:
``` python
data.to_csv('patch_dataset/data_without_na.csv')
data.drop(['NODATA','UNCLASSIFIED'],axis=1,inplace=True)
```
%% Cell type:code id:genetic-alias tags:
%% Cell type:code id:right-religion tags:
``` python
data.to_csv('../data/patch_dataset/data_without_naR20.csv')
```
%% Cell type:code id:sitting-bridal tags:
%% Cell type:markdown id:featured-prospect tags:
# Create hybrids
%% Cell type:code id:aggregate-skiing tags:
``` python
abio_eva=pd.read_csv('../data/Abiotic_WUR/abio_eva.csv')
abio_eva.drop(['Unnamed: 0','ID'],axis=1,inplace=True)
list_clim=[var.split('_')[-1] for var in list(abio_eva.columns) if var[:4]=='clim']
list_clim.remove('solarRadiation') #this variable is not in CHELSA datas
```
%% Cell type:code id:public-spanish tags:
``` python
variable_names=pd.read_excel('../data/variable_names.xlsx')
list_landuse_categories=[variable_names.code.tolist()[i] for i,group in enumerate(variable_names.group) if group=='landuse']
```
%% Cell type:code id:arranged-locking tags:
%% Cell type:code id:sitting-bridal tags:
``` python
#Land use land cover
dataLULC=pd.read_csv("../data/Patch/R10_extract.csv")
dataLULC=dataLULC[[col_name for col_name in dataLULC.columns if col_name[:7]=='landuse']]
dataLULC.drop(['landuseLandcover','landuseLandcover_48', 'landuseLandcover_255'],axis=1,inplace=True)
#RSEBV features
list_to_drop=['Unnamed: 0','lon','lat','ai','npp','elevation','gdd0','gdd10','gddlgd0','gddlgd10','gddlgd5','gdgfgd0','gdgfgd10','gdgfgd5','ngd0','ngd10','ngd5','fgd','lgd', 'netPrimaryProduction']+list_clim
dataRSEBV=pd.read_csv("../data/Patch/R0_extract.csv")
dataRSEBV.drop(['landuseLandcover']+list_to_drop,axis=1,inplace=True)
#habitat
eva_eunis_forest=pd.read_csv("../data/Data EUNIS/eva_eunis_forest.csv")
#Creation of the dataset
data=pd.concat([eva_eunis_forest.habitat,dataRSEBV,dataLULC],axis=1)
##Add slope
# slope=pd.read_csv('../data/Patch/slope_eunisforest.csv')
# data=pd.concat([data,slope.SLOPE_1],axis=1)
#Renanme columns
data=landuse_to_number(data)
variable_names=pd.read_excel('../data/variable_names.xlsx')
variable_names=variable_names.astype({'variable': np.str})
data=rename_columns(data,variable_names)
```
%% Cell type:code id:civil-climb tags:
``` python
data.to_csv('../data/patch_dataset/dataR0_LULCR10.csv')
```
%% Cell type:code id:developmental-capital tags:
``` python
def landuse_to_number(data):
list_former_names=[]
list_new_names=[]
for col_name in data.columns.to_list():
if col_name[:7]=='landuse':
list_former_names.append(col_name)
col_name=col_name.split('_')
list_new_names.append(col_name[-1])
dict_change_names=dict(zip(list_former_names,list_new_names))
data=data.rename(columns=dict_change_names)
return data
```
%% Cell type:markdown id:logical-attack tags:
# Analysis of lost classes
......
......@@ -212,10 +212,11 @@
def rename_columns(dataset,df_variable_names):
'''df_variable_names : must have a column named 'variable', containing all variables names, and a column 'code', containing
future variable names. Variable names to change must be str type'''
list_current_names=[]
list_new_names=[]
variable_names=variable_names.astype({'variable': np.str}) #variables names type to str
for i,name_val in enumerate(df_variable_names.variable.tolist()):
if name_val in dataset.columns.tolist():
list_current_names.append(name_val)
list_new_names.append(df_variable_names.code.tolist()[i])
dict_change_names=dict(zip(list_current_names,list_new_names))
......
This diff is collapsed.
%% Cell type:markdown id:casual-thirty tags:
<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Create-patch-dataset" data-toc-modified-id="Create-patch-dataset-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Create patch dataset</a></span></li><li><span><a href="#Analysis-of-lost-classes" data-toc-modified-id="Analysis-of-lost-classes-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Analysis of lost classes</a></span></li></ul></div>
<div class="toc"><ul class="toc-item"><li><span><a href="#Create-patch-dataset" data-toc-modified-id="Create-patch-dataset-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Create patch dataset</a></span></li><li><span><a href="#Create-hybrids" data-toc-modified-id="Create-hybrids-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Create hybrids</a></span></li><li><span><a href="#Analysis-of-lost-classes" data-toc-modified-id="Analysis-of-lost-classes-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Analysis of lost classes</a></span></li></ul></div>
%% Cell type:code id:nuclear-vision tags:
``` python
import pandas as pd
......@@ -48,11 +48,11 @@
%% Cell type:code id:religious-musical tags:
``` python
#Select features
data=pd.read_csv("../data/Patch/R0_extract.csv")
data=pd.read_csv("../data/Patch/R20_extract.csv")
list_to_drop=['Unnamed: 0','lon','lat','ai','npp','elevation','gdd0','gdd10','gddlgd0','gddlgd10','gddlgd5','gdgfgd0','gdgfgd10','gdgfgd5','ngd0','ngd10','ngd5','fgd','lgd', 'netPrimaryProduction']+list_clim
data.drop(list_to_drop,axis=1,inplace=True)
data=data.astype('float')
#Add slope
slope=pd.read_csv('../data/Patch/slope_eunisforest.csv')
......@@ -63,11 +63,11 @@
```
%% Cell type:code id:waiting-thunder tags:
``` python
#data.drop(['landuseLandcover'],axis=1,inplace=True)
data.drop(['landuseLandcover'],axis=1,inplace=True)
```
%% Cell type:code id:dramatic-excerpt tags:
``` python
......@@ -103,14 +103,10 @@
Datasets: 5
Iterations: 3
Imputed Variables: 42
save_all_iterations: True
%% Cell type:markdown id:guided-consistency tags:
If there is 'landuse_xx', we must change the name before renaming with land cover names :
%% Cell type:code id:opponent-logging tags:
``` python
plt.rcParams["figure.figsize"]=(20,20)
kernel.plot_imputed_distributions(wspace=0.3,hspace=0.3);
......@@ -144,10 +140,16 @@
``` python
median_dataset.to_csv('../data/Patch/imputed datasets/median_dataset.csv')
```
%% Cell type:markdown id:opening-authorization tags:
End imputation with MICE.
If patch :
If there is 'landuse_xx', we must change the name before renaming with land cover names :
%% Cell type:code id:hollow-finding tags:
``` python
#data=pd.concat([data.habitat,median_dataset],axis=1)
list_former_names=[]
......@@ -161,64 +163,109 @@
data=data.rename(columns=dict_change_names)
```
%% Cell type:markdown id:urban-means tags:
End imputation with MICE.
If not patch, directly here :
%% Cell type:code id:mental-fossil tags:
``` python
#median_dataset.drop('Unnamed: 0',axis=1,inplace=True)
data = data.astype({'landuseLandcover': np.int})
#data = data.astype({'landuseLandcover': np.int})
#Catagorical land use to one hot vector
data=categorical_to_one_hot(data,'landuseLandcover')
#data=categorical_to_one_hot(data,'landuseLandcover')
#Rename variables
variable_names=pd.read_excel('../data/variable_names.xlsx')
variable_names=variable_names.astype({'variable': np.str})
data=rename_columns(data,variable_names)
```
%% Cell type:code id:acquired-slovak tags:
``` python
data=data.rename(columns={'SLOPE_1':'slope'})
```
%% Cell type:code id:right-religion tags:
%% Cell type:code id:increasing-avenue tags:
``` python
data.to_csv('../data/patch_dataset/data_without_na.csv')
data.drop(['NODATA','UNCLASSIFIED'],axis=1,inplace=True)
```
%% Cell type:code id:genetic-alias tags:
%% Cell type:code id:right-religion tags:
``` python
data.to_csv('../data/patch_dataset/data_without_naR20.csv')
```
%% Cell type:code id:sitting-bridal tags:
%% Cell type:markdown id:featured-prospect tags:
# Create hybrids
%% Cell type:code id:aggregate-skiing tags:
``` python
abio_eva=pd.read_csv('../data/Abiotic_WUR/abio_eva.csv')
abio_eva.drop(['Unnamed: 0','ID'],axis=1,inplace=True)
list_clim=[var.split('_')[-1] for var in list(abio_eva.columns) if var[:4]=='clim']
list_clim.remove('solarRadiation') #this variable is not in CHELSA datas
```
%% Cell type:code id:public-spanish tags:
``` python
variable_names=pd.read_excel('../data/variable_names.xlsx')
list_landuse_categories=[variable_names.code.tolist()[i] for i,group in enumerate(variable_names.group) if group=='landuse']
```
%% Cell type:code id:arranged-locking tags:
%% Cell type:code id:sitting-bridal tags:
``` python
#Land use land cover
dataLULC=pd.read_csv("../data/Patch/R10_extract.csv")
dataLULC=dataLULC[[col_name for col_name in dataLULC.columns if col_name[:7]=='landuse']]
dataLULC.drop(['landuseLandcover','landuseLandcover_48', 'landuseLandcover_255'],axis=1,inplace=True)
#RSEBV features
list_to_drop=['Unnamed: 0','lon','lat','ai','npp','elevation','gdd0','gdd10','gddlgd0','gddlgd10','gddlgd5','gdgfgd0','gdgfgd10','gdgfgd5','ngd0','ngd10','ngd5','fgd','lgd', 'netPrimaryProduction']+list_clim
dataRSEBV=pd.read_csv("../data/Patch/R0_extract.csv")
dataRSEBV.drop(['landuseLandcover']+list_to_drop,axis=1,inplace=True)
#habitat
eva_eunis_forest=pd.read_csv("../data/Data EUNIS/eva_eunis_forest.csv")
#Creation of the dataset
data=pd.concat([eva_eunis_forest.habitat,dataRSEBV,dataLULC],axis=1)
##Add slope
# slope=pd.read_csv('../data/Patch/slope_eunisforest.csv')
# data=pd.concat([data,slope.SLOPE_1],axis=1)
#Renanme columns
data=landuse_to_number(data)
variable_names=pd.read_excel('../data/variable_names.xlsx')
variable_names=variable_names.astype({'variable': np.str})
data=rename_columns(data,variable_names)
```
%% Cell type:code id:civil-climb tags:
``` python
data.to_csv('../data/patch_dataset/dataR0_LULCR10.csv')
```
%% Cell type:code id:developmental-capital tags:
``` python
def landuse_to_number(data):
list_former_names=[]
list_new_names=[]
for col_name in data.columns.to_list():
if col_name[:7]=='landuse':
list_former_names.append(col_name)
col_name=col_name.split('_')
list_new_names.append(col_name[-1])
dict_change_names=dict(zip(list_former_names,list_new_names))
data=data.rename(columns=dict_change_names)
return data
```
%% Cell type:markdown id:logical-attack tags:
# Analysis of lost classes
......
......@@ -212,10 +212,11 @@
def rename_columns(dataset,df_variable_names):
'''df_variable_names : must have a column named 'variable', containing all variables names, and a column 'code', containing
future variable names. Variable names to change must be str type'''
list_current_names=[]
list_new_names=[]
variable_names=variable_names.astype({'variable': np.str}) #variables names type to str
for i,name_val in enumerate(df_variable_names.variable.tolist()):
if name_val in dataset.columns.tolist():
list_current_names.append(name_val)
list_new_names.append(df_variable_names.code.tolist()[i])
dict_change_names=dict(zip(list_current_names,list_new_names))
......
......@@ -210,6 +210,7 @@ def rename_columns(dataset,df_variable_names):
future variable names. Variable names to change must be str type'''
list_current_names=[]
list_new_names=[]
variable_names=variable_names.astype({'variable': np.str}) #variables names type to str
for i,name_val in enumerate(df_variable_names.variable.tolist()):
if name_val in dataset.columns.tolist():
list_current_names.append(name_val)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment