Skip to content
Snippets Groups Projects
Commit aa625fef authored by Jean-Luc Parouty's avatar Jean-Luc Parouty
Browse files

Merge branch 'master' into dev

parents ac3e818f 22457993
No related branches found
No related tags found
No related merge requests found
Showing
with 12370 additions and 3148 deletions
......@@ -3,8 +3,10 @@
__pycache__
*/__pycache__/*
*==*==.ipynb
!*==done==.ipynb
stderr.txt
stdout.txt
debug.log
run/
figs/
data/
......
source diff could not be displayed: it is too large. Options to address this: view the blob.
source diff could not be displayed: it is too large. Options to address this: view the blob.
This diff is collapsed.
source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
# <!-- TITLE --> [GTSRB5] - Full convolutions
<!-- DESC --> Episode 5 : A lot of models, a lot of datasets and a lot of results.
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Try multiple solutions
- Design a generic and batch-usable code
The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
The final aim is to recognise them !
Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
## What we're going to do :
Our main steps:
- Try n models with n datasets
- Save a Pandas/h5 report
- Write to be run in batch mode
## Step 1 - Import and init
### 1.1 - Python stuffs
%% Cell type:code id: tags:
``` python
import tensorflow as tf
from tensorflow import keras
import numpy as np
import h5py
import sys,os,time,json
import random
from IPython.display import display
sys.path.append('..')
import fidle.pwk as pwk
VERSION='1.6'
sys.path.append('..')
import fidle.pwk as ooo
run_dir = './run/GTSRB5'
datasets_dir = pwk.init('GTSRB5', run_dir)
```
%% Output
<br>**FIDLE 2020 - Practical Work Module**
Version : 1.2b1 DEV
Notebook id : GTSRB5
Run time : Monday 11 January 2021, 22:02:39
TensorFlow version : 2.2.0
Keras version : 2.3.0-tf
Datasets dir : /home/pjluc/datasets/fidle
Run dir : ./run/GTSRB5
Update keras cache : False
%% Cell type:markdown id: tags:
### 1.2 - Parameters
%% Cell type:code id: tags:
``` python
enhanced_dir = f'{datasets_dir}/GTSRB/enhanced'
# enhanced_dir = f'./data'
# ---- For tests
datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-RGB']
models = {'v1':'get_model_v1', 'v2':'get_model_v2', 'v3':'get_model_v3'}
batch_size = 64
epochs = 5
scale = 0.1
with_datagen = False
verbose = 0
# ---- All possibilities
# datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']
# models = {'v1':'get_model_v1', 'v2':'get_model_v2', 'v3':'get_model_v3'}
# batch_size = 64
# epochs = 16
# scale = 1
# with_datagen = False
# verbose = 0
# ---- Data augmentation
# datasets = ['set-48x48-RGB']
# models = {'v2':'get_model_v2'}
# batch_size = 64
# epochs = 20
# scale = 1
# with_datagen = True
# verbose = 0
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
pwk.override('enhanced_dir', 'datasets', 'models', 'batch_size', 'epochs', 'scale', 'with_datagen', 'verbose')
```
%% Cell type:markdown id: tags:
## Step 2 - Start
%% Cell type:code id: tags:
``` python
random.seed(time.time())
# ---- Where I am ?
now = time.strftime("%A %d %B %Y - %Hh%Mm%Ss")
here = os.getcwd()
tag_id = '{:06}'.format(random.randint(0,99999))
# ---- Who I am ?
oar_id = os.getenv("OAR_JOB_ID", "??")
slurm_id = os.getenv("SLURM_JOBID", "??")
print('Full Convolutions Notebook :')
print(' Version : ', VERSION )
print(' Now is : ', now )
print(' OAR id : ', oar_id )
print(' SLURM id : ', slurm_id )
print(' Tag id : ', tag_id )
print(' Working directory : ', here )
print(' Output directory : ', run_dir )
print(' for tensorboard : ', f'--logdir {run_dir}')
```
%% Output
Full Convolutions Notebook :
Version : 1.6
Now is : Monday 11 January 2021 - 22h02m39s
OAR id : ??
SLURM id : ??
Tag id : 095238
Working directory : /home/pjluc/dev/fidle/GTSRB
Output directory : ./run/GTSRB5
for tensorboard : --logdir ./run/GTSRB5
%% Cell type:code id: tags:
``` python
# ---- Uncomment for batch tests
#
# print("\n\n*** Test mode - Exit before making big treatments... ***\n\n")
# sys.exit()
```
%% Cell type:markdown id: tags:
## Step 3 - Dataset loading
%% Cell type:code id: tags:
``` python
def read_dataset(enhanced_dir, dataset_name):
'''Reads h5 dataset from dataset_dir
Args:
dataset_dir : datasets dir
name : dataset name, without .h5
Returns: x_train,y_train,x_test,y_test data'''
# ---- Read dataset
filename = f'{enhanced_dir}/{dataset_name}.h5'
size = os.path.getsize(filename)/(1024*1024)
with h5py.File(filename,'r') as f:
x_train = f['x_train'][:]
y_train = f['y_train'][:]
x_test = f['x_test'][:]
y_test = f['y_test'][:]
# ---- Shuffle
x_train,y_train=pwk.shuffle_np_dataset(x_train,y_train)
# ---- done
return x_train,y_train,x_test,y_test,size
```
%% Cell type:markdown id: tags:
## Step 4 - Models collection
%% Cell type:code id: tags:
``` python
# A basic model
#
def get_model_v1(lx,ly,lz):
model = keras.models.Sequential()
model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
model.add( keras.layers.MaxPooling2D((2, 2)))
model.add( keras.layers.Dropout(0.2))
model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
model.add( keras.layers.MaxPooling2D((2, 2)))
model.add( keras.layers.Dropout(0.2))
model.add( keras.layers.Flatten())
model.add( keras.layers.Dense(1500, activation='relu'))
model.add( keras.layers.Dropout(0.5))
model.add( keras.layers.Dense(43, activation='softmax'))
return model
# A more sophisticated model
#
def get_model_v2(lx,ly,lz):
model = keras.models.Sequential()
model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add( keras.layers.Dropout(0.2))
model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add( keras.layers.Dropout(0.2))
model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add( keras.layers.Dropout(0.2))
model.add( keras.layers.Flatten())
model.add( keras.layers.Dense(512, activation='relu'))
model.add( keras.layers.Dropout(0.5))
model.add( keras.layers.Dense(43, activation='softmax'))
return model
def get_model_v3(lx,ly,lz):
model = keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(lx,ly,lz)))
model.add(tf.keras.layers.BatchNormalization(axis=-1))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'))
model.add(tf.keras.layers.BatchNormalization(axis=-1))
model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
model.add(tf.keras.layers.BatchNormalization(axis=-1))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(43, activation='softmax'))
return model
```
%% Cell type:markdown id: tags:
## Step 5 - Multiple datasets, multiple models ;-)
%% Cell type:code id: tags:
``` python
def multi_run(enhanced_dir, datasets, models, datagen=None,
scale=1, batch_size=64, epochs=16,
verbose=0, tag_id='last'):
"""
Launches a dataset-model combination
args:
enhanced_dir : Directory of the enhanced datasets
datasets : List of dataset (whitout .h5)
models : List of model like { "model name":get_model(), ...}
datagen : Data generator or None (None)
scale : % of dataset to use. 1 mean all. (1)
batch_size : Batch size (64)
epochs : Number of epochs (16)
verbose : Verbose level (0)
tag_id : postfix for report, logs and models dir (_last)
return:
report : Report as a dict for Pandas.
"""
# ---- Logs and models dir
#
os.makedirs(f'{run_dir}/logs_{tag_id}', mode=0o750, exist_ok=True)
os.makedirs(f'{run_dir}/models_{tag_id}', mode=0o750, exist_ok=True)
# ---- Columns of output
#
output={}
output['Dataset'] = []
output['Size'] = []
for m in models:
output[m+'_Accuracy'] = []
output[m+'_Duration'] = []
# ---- Let's go
#
for d_name in datasets:
print("\nDataset : ",d_name)
# ---- Read dataset
x_train,y_train,x_test,y_test, d_size = read_dataset(enhanced_dir, d_name)
output['Dataset'].append(d_name)
output['Size'].append(d_size)
# ---- Rescale
x_train,y_train,x_test,y_test = pwk.rescale_dataset(x_train,y_train,x_test,y_test, scale=scale)
# ---- Get the shape
(n,lx,ly,lz) = x_train.shape
# ---- For each model
for m_name,m_function in models.items():
print(" Run model {} : ".format(m_name), end='')
# ---- get model
try:
# ---- get function by name
m_function=globals()[m_function]
model=m_function(lx,ly,lz)
# ---- Compile it
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# ---- Callbacks tensorboard
log_dir = f'{run_dir}/logs_{tag_id}/tb_{d_name}_{m_name}'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# ---- Callbacks bestmodel
save_dir = f'{run_dir}/models_{tag_id}/model_{d_name}_{m_name}.h5'
bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
# ---- Train
start_time = time.time()
if datagen==None:
# ---- No data augmentation (datagen=None) --------------------------------------
history = model.fit(x_train, y_train,
batch_size = batch_size,
epochs = epochs,
verbose = verbose,
validation_data = (x_test, y_test),
callbacks = [tensorboard_callback, bestmodel_callback])
else:
# ---- Data augmentation (datagen given) ----------------------------------------
datagen.fit(x_train)
history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
steps_per_epoch = int(len(x_train)/batch_size),
epochs = epochs,
verbose = verbose,
validation_data = (x_test, y_test),
callbacks = [tensorboard_callback, bestmodel_callback])
# ---- Result
end_time = time.time()
duration = end_time-start_time
accuracy = max(history.history["val_accuracy"])*100
#
output[m_name+'_Accuracy'].append(accuracy)
output[m_name+'_Duration'].append(duration)
print(f"Accuracy={accuracy: 7.2f} Duration={duration: 7.2f}")
except:
raise
output[m_name+'_Accuracy'].append('0')
output[m_name+'_Duration'].append('999')
print('-')
return output
```
%% Cell type:markdown id: tags:
## Step 6 - Run !
%% Cell type:code id: tags:
``` python
pwk.chrono_start()
print('\n---- Run','-'*50)
# ---- Data augmentation or not
#
if with_datagen :
datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
featurewise_std_normalization=False,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.2,
shear_range=0.1,
rotation_range=10.)
else:
datagen=None
# ---- Run
#
output = multi_run(enhanced_dir,
datasets,
models,
datagen = datagen,
scale = scale,
batch_size = batch_size,
epochs = epochs,
verbose = verbose,
tag_id = tag_id)
# ---- Save report
#
report={}
report['output']=output
report['description'] = f'scale={scale} batch_size={batch_size} epochs={epochs} data_aug={with_datagen}'
report_name=f'{run_dir}/report_{tag_id}.json'
with open(report_name, 'w') as file:
json.dump(report, file, indent=4)
print('\nReport saved as ',report_name)
pwk.chrono_show()
print('-'*59)
```
%% Output
---- Run --------------------------------------------------
Dataset : set-24x24-L
Run model v1 : Accuracy= 12.70 Duration= 3.77
Run model v2 : Accuracy= 4.76 Duration= 4.91
Run model v3 : Accuracy= 4.76 Duration= 4.69
Dataset : set-24x24-RGB
Run model v1 : Accuracy= 17.46 Duration= 3.79
Run model v2 : Accuracy= 6.35 Duration= 4.87
Run model v3 : Accuracy= 7.14 Duration= 4.88
Dataset : set-48x48-RGB
Run model v1 : Accuracy= 24.60 Duration= 18.79
Run model v2 : Accuracy= 6.35 Duration= 20.39
Run model v3 : Accuracy= 4.76 Duration= 19.04
Report saved as ./run/GTSRB5/report_095238.json
Duration : 00:01:27 524ms
-----------------------------------------------------------
%% Cell type:markdown id: tags:
## Step 7 - That's all folks..
%% Cell type:code id: tags:
``` python
pwk.end()
```
%% Output
End time is : Monday 11 January 2021, 22:05:23
Duration is : 00:02:44 809ms
This notebook ends here
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
%% Cell type:code id: tags:
``` python
from IPython.display import display,Markdown
display(Markdown(open('README.md', 'r').read()))
#
# This README is visible under Jupiter LAb ! :-)
```
%% Output
<a name="top"></a>
[<img width="600px" src="fidle/img/00-Fidle-titre-01.svg"></img>](#top)
<!-- --------------------------------------------------- -->
<!-- To correctly view this README under Jupyter Lab -->
<!-- Open the notebook: README.ipynb! -->
<!-- --------------------------------------------------- -->
## A propos
This repository contains all the documents and links of the **Fidle Training** .
Fidle (for Formation Introduction au Deep Learning) is a 2-day training session
co-organized by the Formation Permanente CNRS and the SARI and DEVLOG networks.
The objectives of this training are :
- Understanding the **bases of Deep Learning** neural networks
- Develop a **first experience** through simple and representative examples
- Understanding **Tensorflow/Keras** and **Jupyter lab** technologies
- Apprehend the **academic computing environments** Tier-2 or Tier-1 with powerfull GPU
For more information, you can contact us at :
[<img width="200px" style="vertical-align:middle" src="fidle/img/00-Mail_contact.svg"></img>](#top)
Current Version : <!-- VERSION_BEGIN -->
2.0
2.0.1
<!-- VERSION_END -->
## Course materials
| | | |
|:--:|:--:|:--:|
| **[<img width="50px" src="fidle/img/00-Fidle-pdf.svg"></img><br>Course slides](https://cloud.univ-grenoble-alpes.fr/index.php/s/wxCztjYBbQ6zwd6)**<br>The course in pdf format<br>(12 Mo)| **[<img width="50px" src="fidle/img/00-Notebooks.svg"></img><br>Notebooks](https://gricad-gitlab.univ-grenoble-alpes.fr/talks/fidle/-/archive/master/fidle-master.zip)**<br> &nbsp;&nbsp;&nbsp;&nbsp;Get a Zip or clone this repository &nbsp;&nbsp;&nbsp;&nbsp;<br>(10 Mo)| **[<img width="50px" src="fidle/img/00-Datasets-tar.svg"></img><br>Datasets](https://cloud.univ-grenoble-alpes.fr/index.php/s/wxCztjYBbQ6zwd6)**<br>All the needed datasets<br>(1.2 Go)|
Have a look about **[How to get and install](https://gricad-gitlab.univ-grenoble-alpes.fr/talks/fidle/-/wikis/Install-Fidle)** these notebooks and datasets.
## Jupyter notebooks
<!-- INDEX_BEGIN -->
### Linear and logistic regression
- **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb)
Low-level implementation, using numpy, of a direct resolution for a linear regression
- **[GRAD1](LinearReg/02-Gradient-descent.ipynb)** - [Linear regression with gradient descent](LinearReg/02-Gradient-descent.ipynb)
Low level implementation of a solution by gradient descent. Basic and stochastic approach.
- **[POLR1](LinearReg/03-Polynomial-Regression.ipynb)** - [Complexity Syndrome](LinearReg/03-Polynomial-Regression.ipynb)
Illustration of the problem of complexity with the polynomial regression
- **[LOGR1](LinearReg/04-Logistic-Regression.ipynb)** - [Logistic regression](LinearReg/04-Logistic-Regression.ipynb)
Simple example of logistic regression with a sklearn solution
### Perceptron Model 1957
- **[PER57](IRIS/01-Simple-Perceptron.ipynb)** - [Perceptron Model 1957](IRIS/01-Simple-Perceptron.ipynb)
Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
### Basic regression using DNN
- **[BHPD1](BHPD/01-DNN-Regression.ipynb)** - [Regression with a Dense Network (DNN)](BHPD/01-DNN-Regression.ipynb)
Simple example of a regression with the dataset Boston Housing Prices Dataset (BHPD)
- **[BHPD2](BHPD/02-DNN-Regression-Premium.ipynb)** - [Regression with a Dense Network (DNN) - Advanced code](BHPD/02-DNN-Regression-Premium.ipynb)
A more advanced implementation of the precedent example
### Basic classification using a DNN
- **[MNIST1](MNIST/01-DNN-MNIST.ipynb)** - [Simple classification with DNN](MNIST/01-DNN-MNIST.ipynb)
An example of classification using a dense neural network for the famous MNIST dataset
### Images classification with Convolutional Neural Networks (CNN)
- **[GTSRB1](GTSRB/01-Preparation-of-data.ipynb)** - [Dataset analysis and preparation](GTSRB/01-Preparation-of-data.ipynb)
Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
- **[GTSRB2](GTSRB/02-First-convolutions.ipynb)** - [First convolutions](GTSRB/02-First-convolutions.ipynb)
Episode 2 : First convolutions and first classification of our traffic signs
- **[GTSRB3](GTSRB/03-Tracking-and-visualizing.ipynb)** - [Training monitoring](GTSRB/03-Tracking-and-visualizing.ipynb)
Episode 3 : Monitoring, analysis and check points during a training session
- **[GTSRB4](GTSRB/04-Data-augmentation.ipynb)** - [Data augmentation ](GTSRB/04-Data-augmentation.ipynb)
Episode 4 : Adding data by data augmentation when we lack it, to improve our results
- **[GTSRB5](GTSRB/05-Full-convolutions.ipynb)** - [Full convolutions](GTSRB/05-Full-convolutions.ipynb)
Episode 5 : A lot of models, a lot of datasets and a lot of results.
- **[GTSRB6](GTSRB/06-Notebook-as-a-batch.ipynb)** - [Full convolutions as a batch](GTSRB/06-Notebook-as-a-batch.ipynb)
Episode 6 : To compute bigger, use your notebook in batch mode
- **[GTSRB7](GTSRB/07-Show-report.ipynb)** - [Batch reports](GTSRB/07-Show-report.ipynb)
Episode 7 : Displaying our jobs report, and the winner is...
- **[GTSRB10](GTSRB/batch_oar.sh)** - [OAR batch script submission](GTSRB/batch_oar.sh)
Bash script for an OAR batch submission of an ipython code
- **[GTSRB11](GTSRB/batch_slurm.sh)** - [SLURM batch script](GTSRB/batch_slurm.sh)
Bash script for a Slurm batch submission of an ipython code
### Sentiment analysis with word embedding
- **[IMDB1](IMDB/01-Embedding-Keras.ipynb)** - [Sentiment alalysis with text embedding](IMDB/01-Embedding-Keras.ipynb)
A very classical example of word embedding with a dataset from Internet Movie Database (IMDB)
- **[IMDB2](IMDB/02-Prediction.ipynb)** - [Reload and reuse a saved model](IMDB/02-Prediction.ipynb)
Retrieving a saved model to perform a sentiment analysis (movie review)
- **[IMDB3](IMDB/03-LSTM-Keras.ipynb)** - [Sentiment analysis with a LSTM network](IMDB/03-LSTM-Keras.ipynb)
Still the same problem, but with a network combining embedding and LSTM
### Time series with Recurrent Neural Network (RNN)
- **[SYNOP1](SYNOP/01-Preparation-of-data.ipynb)** - [Preparation of data](SYNOP/01-Preparation-of-data.ipynb)
Episode 1 : Data analysis and preparation of a meteorological dataset (SYNOP)
- **[SYNOP2](SYNOP/02-First-predictions.ipynb)** - [First predictions at 3h](SYNOP/02-First-predictions.ipynb)
Episode 2 : Learning session and weather prediction attempt at 3h
- **[SYNOP3](SYNOP/03-12h-predictions.ipynb)** - [12h predictions](SYNOP/03-12h-predictions.ipynb)
Episode 3: Attempt to predict in a more longer term
### Unsupervised learning with an autoencoder neural network (AE)
- **[AE1](AE/01-AE-with-MNIST.ipynb)** - [Building and training an AE denoiser model](AE/01-AE-with-MNIST.ipynb)
Episode 1 : After construction, the model is trained with noisy data from the MNIST dataset.
- **[AE2](AE/02-AE-with-MNIST-post.ipynb)** - [Exploring our denoiser model](AE/02-AE-with-MNIST-post.ipynb)
Episode 2 : Using the previously trained autoencoder to denoise data
### Generative network with Variational Autoencoder (VAE)
- **[VAE1](VAE/01-VAE-with-MNIST.ipynb)** - [First VAE, with a small dataset (MNIST)](VAE/01-VAE-with-MNIST.ipynb)
Construction and training of a VAE with a latent space of small dimension.
- **[VAE2](VAE/02-VAE-with-MNIST-post.ipynb)** - [Analysis of the associated latent space](VAE/02-VAE-with-MNIST-post.ipynb)
Visualization and analysis of the VAE's latent space
- **[VAE5](VAE/05-About-CelebA.ipynb)** - [Another game play : About the CelebA dataset](VAE/05-About-CelebA.ipynb)
Episode 1 : Presentation of the CelebA dataset and problems related to its size
- **[VAE6](VAE/06-Prepare-CelebA-datasets.ipynb)** - [Generation of a clustered dataset](VAE/06-Prepare-CelebA-datasets.ipynb)
Episode 2 : Analysis of the CelebA dataset and creation of an clustered and usable dataset
- **[VAE7](VAE/07-Check-CelebA.ipynb)** - [Checking the clustered dataset](VAE/07-Check-CelebA.ipynb)
Episode : 3 Clustered dataset verification and testing of our datagenerator
- **[VAE8](VAE/08-VAE-with-CelebA.ipynb)** - [Training session for our VAE](VAE/08-VAE-with-CelebA.ipynb)
Episode 4 : Training with our clustered datasets in notebook or batch mode
- **[VAE9](VAE/09-VAE-withCelebA-post.ipynb)** - [Data generation from latent space](VAE/09-VAE-withCelebA-post.ipynb)
Episode 5 : Exploring latent space to generate new data
- **[VAE10](VAE/batch_slurm.sh)** - [SLURM batch script](VAE/batch_slurm.sh)
Bash script for SLURM batch submission of VAE8 notebooks
### Miscellaneous
- **[ACTF1](Misc/Activation-Functions.ipynb)** - [Activation functions](Misc/Activation-Functions.ipynb)
Some activation functions, with their derivatives.
- **[NP1](Misc/Numpy.ipynb)** - [A short introduction to Numpy](Misc/Numpy.ipynb)
Numpy is an essential tool for the Scientific Python.
- **[TSB1](Misc/Using-Tensorboard.ipynb)** - [Tensorboard with/from Jupyter ](Misc/Using-Tensorboard.ipynb)
4 ways to use Tensorboard from the Jupyter environment
<!-- INDEX_END -->
## Installation
A procedure for **configuring** and **starting Jupyter** is available in the **[Wiki](https://gricad-gitlab.univ-grenoble-alpes.fr/talks/fidle/-/wikis/Install-Fidle)**.
## Licence
[<img width="100px" src="fidle/img/00-fidle-CC BY-NC-SA.svg"></img>](https://creativecommons.org/licenses/by-nc-sa/4.0/)
\[en\] Attribution - NonCommercial - ShareAlike 4.0 International (CC BY-NC-SA 4.0)
\[Fr\] Attribution - Pas d’Utilisation Commerciale - Partage dans les Mêmes Conditions 4.0 International
See [License](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
See [Disclaimer](https://creativecommons.org/licenses/by-nc-sa/4.0/#).
----
[<img width="80px" src="fidle/img/00-Fidle-logo-01.svg"></img>](#top)
......
......@@ -23,7 +23,7 @@ The objectives of this training are :
For more information, you can contact us at :
[<img width="200px" style="vertical-align:middle" src="fidle/img/00-Mail_contact.svg"></img>](#top)
Current Version : <!-- VERSION_BEGIN -->
2.0
2.0.1
<!-- VERSION_END -->
......
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
## Mise a jour du catalog des notebooks et des READMEs
- Génération du **catalog des notebooks** : [./logs/catalog.json](./logs/catalog.json)
Ce fichier comporte une liste détaillée de tous les notebooks et scripts.
- Génération automatique de la table des matières et mise à jour des **README**
- [README.md](../README.md)
- [README.ipynb](../README.ipynb)
%% Cell type:markdown id: tags:
## Step 1 - Load modules and init
%% Cell type:code id: tags:
``` python
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from IPython.display import display,Image,Markdown,HTML
import re
import sys, os, glob
import datetime, time
import json
from collections import OrderedDict
from importlib import reload
sys.path.append('..')
import fidle.pwk as pwk
import fidle.config as config
import fidle.cookindex as cookindex
```
%% Cell type:markdown id: tags:
## Step 2 - List of folders containing notebooks to be indexed :
Order wil be index order
%% Cell type:code id: tags:
``` python
directories_to_index = {'LinearReg':'Linear and logistic regression',
'IRIS':'Perceptron Model 1957',
'BHPD':'Basic regression using DNN',
'MNIST':'Basic classification using a DNN',
'GTSRB':'Images classification with Convolutional Neural Networks (CNN)',
'IMDB':'Sentiment analysis with word embedding',
'SYNOP':'Time series with Recurrent Neural Network (RNN)',
'AE':'Unsupervised learning with an autoencoder neural network (AE)',
'VAE':'Generative network with Variational Autoencoder (VAE)',
'Misc':'Miscellaneous'
}
```
%% Cell type:markdown id: tags:
## Step 3 - Catalog of notebooks
### 3.1 - Build catalog
%% Cell type:code id: tags:
``` python
# ---- Get the notebook list
#
files_list = cookindex.get_files(directories_to_index.keys())
# ---- Get a detailled catalog for this list
#
catalog = cookindex.get_catalog(files_list)
with open(config.CATALOG_FILE,'wt') as fp:
json.dump(catalog,fp,indent=4)
print(f'Catalog saved as {config.CATALOG_FILE}')
print('Entries : ',len(catalog))
```
%% Output
Read : LinearReg/01-Linear-Regression.ipynb
Read : LinearReg/02-Gradient-descent.ipynb
Read : LinearReg/03-Polynomial-Regression.ipynb
Read : LinearReg/04-Logistic-Regression.ipynb
Read : IRIS/01-Simple-Perceptron.ipynb
Read : BHPD/01-DNN-Regression.ipynb
Read : BHPD/02-DNN-Regression-Premium.ipynb
Read : MNIST/01-DNN-MNIST.ipynb
Read : GTSRB/01-Preparation-of-data.ipynb
Read : GTSRB/02-First-convolutions.ipynb
Read : GTSRB/03-Tracking-and-visualizing.ipynb
Read : GTSRB/04-Data-augmentation.ipynb
Read : GTSRB/05-Full-convolutions.ipynb
Read : GTSRB/06-Notebook-as-a-batch.ipynb
Read : GTSRB/07-Show-report.ipynb
Read : IMDB/01-Embedding-Keras.ipynb
Read : IMDB/02-Prediction.ipynb
Read : IMDB/03-LSTM-Keras.ipynb
Read : SYNOP/01-Preparation-of-data.ipynb
Read : SYNOP/02-First-predictions.ipynb
Read : SYNOP/03-12h-predictions.ipynb
Read : AE/01-AE-with-MNIST.ipynb
Read : AE/02-AE-with-MNIST-post.ipynb
Read : VAE/01-VAE-with-MNIST.ipynb
Read : VAE/02-VAE-with-MNIST-post.ipynb
Read : VAE/05-About-CelebA.ipynb
Read : VAE/06-Prepare-CelebA-datasets.ipynb
Read : VAE/07-Check-CelebA.ipynb
Read : VAE/08-VAE-with-CelebA.ipynb
Read : VAE/09-VAE-withCelebA-post.ipynb
Read : Misc/Activation-Functions.ipynb
Read : Misc/Numpy.ipynb
Read : Misc/Using-Tensorboard.ipynb
Catalog saved as ../fidle/logs/catalog.json
Entries : 36
%% Cell type:markdown id: tags:
### 3.2 build index
%% Cell type:code id: tags:
``` python
styles = open('css/readme.css', "r").read()
lines_md=[]
lines_html=[styles]
for directory,title in directories_to_index.items():
lines_md.append(f'\n### {title}')
lines_html.append( f'<div class="fid_section">{title}</div>')
entries = { k:v for k,v in catalog.items() if v['dirname']==directory }
for id, about in entries.items():
id = about['id']
dirname = about['dirname']
basename = about['basename']
title = about['title']
description = about['description']
link=f'{dirname}/{basename}'.replace(' ','%20')
md = f'- **[{id}]({link})** - [{title}]({link}) \n'
md += f'{description}'
html = f"""<div class="fid_line">
<span class="fid_id">
<a href="{link}">{id}</a>
</span> <a href="{link}">{title}</a><br>
<span class="fid_desc">{description}</span>
</div>
"""
lines_md.append(md)
lines_html.append(html)
index_md = '\n'.join(lines_md)
index_html = '\n'.join(lines_html)
display(Markdown('**Index is :**'))
display(Markdown(index_md))
# display(HTML(index_html))
```
%% Output
**Index is :**
### Linear and logistic regression
- **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb)
Low-level implementation, using numpy, of a direct resolution for a linear regression
- **[GRAD1](LinearReg/02-Gradient-descent.ipynb)** - [Linear regression with gradient descent](LinearReg/02-Gradient-descent.ipynb)
Low level implementation of a solution by gradient descent. Basic and stochastic approach.
- **[POLR1](LinearReg/03-Polynomial-Regression.ipynb)** - [Complexity Syndrome](LinearReg/03-Polynomial-Regression.ipynb)
Illustration of the problem of complexity with the polynomial regression
- **[LOGR1](LinearReg/04-Logistic-Regression.ipynb)** - [Logistic regression](LinearReg/04-Logistic-Regression.ipynb)
Simple example of logistic regression with a sklearn solution
### Perceptron Model 1957
- **[PER57](IRIS/01-Simple-Perceptron.ipynb)** - [Perceptron Model 1957](IRIS/01-Simple-Perceptron.ipynb)
Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
### Basic regression using DNN
- **[BHPD1](BHPD/01-DNN-Regression.ipynb)** - [Regression with a Dense Network (DNN)](BHPD/01-DNN-Regression.ipynb)
Simple example of a regression with the dataset Boston Housing Prices Dataset (BHPD)
- **[BHPD2](BHPD/02-DNN-Regression-Premium.ipynb)** - [Regression with a Dense Network (DNN) - Advanced code](BHPD/02-DNN-Regression-Premium.ipynb)
A more advanced implementation of the precedent example
### Basic classification using a DNN
- **[MNIST1](MNIST/01-DNN-MNIST.ipynb)** - [Simple classification with DNN](MNIST/01-DNN-MNIST.ipynb)
An example of classification using a dense neural network for the famous MNIST dataset
### Images classification with Convolutional Neural Networks (CNN)
- **[GTSRB1](GTSRB/01-Preparation-of-data.ipynb)** - [Dataset analysis and preparation](GTSRB/01-Preparation-of-data.ipynb)
Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
- **[GTSRB2](GTSRB/02-First-convolutions.ipynb)** - [First convolutions](GTSRB/02-First-convolutions.ipynb)
Episode 2 : First convolutions and first classification of our traffic signs
- **[GTSRB3](GTSRB/03-Tracking-and-visualizing.ipynb)** - [Training monitoring](GTSRB/03-Tracking-and-visualizing.ipynb)
Episode 3 : Monitoring, analysis and check points during a training session
- **[GTSRB4](GTSRB/04-Data-augmentation.ipynb)** - [Data augmentation ](GTSRB/04-Data-augmentation.ipynb)
Episode 4 : Adding data by data augmentation when we lack it, to improve our results
- **[GTSRB5](GTSRB/05-Full-convolutions.ipynb)** - [Full convolutions](GTSRB/05-Full-convolutions.ipynb)
Episode 5 : A lot of models, a lot of datasets and a lot of results.
- **[GTSRB6](GTSRB/06-Notebook-as-a-batch.ipynb)** - [Full convolutions as a batch](GTSRB/06-Notebook-as-a-batch.ipynb)
Episode 6 : To compute bigger, use your notebook in batch mode
- **[GTSRB7](GTSRB/07-Show-report.ipynb)** - [Batch reports](GTSRB/07-Show-report.ipynb)
Episode 7 : Displaying our jobs report, and the winner is...
- **[GTSRB10](GTSRB/batch_oar.sh)** - [OAR batch script submission](GTSRB/batch_oar.sh)
Bash script for an OAR batch submission of an ipython code
- **[GTSRB11](GTSRB/batch_slurm.sh)** - [SLURM batch script](GTSRB/batch_slurm.sh)
Bash script for a Slurm batch submission of an ipython code
### Sentiment analysis with word embedding
- **[IMDB1](IMDB/01-Embedding-Keras.ipynb)** - [Sentiment alalysis with text embedding](IMDB/01-Embedding-Keras.ipynb)
A very classical example of word embedding with a dataset from Internet Movie Database (IMDB)
- **[IMDB2](IMDB/02-Prediction.ipynb)** - [Reload and reuse a saved model](IMDB/02-Prediction.ipynb)
Retrieving a saved model to perform a sentiment analysis (movie review)
- **[IMDB3](IMDB/03-LSTM-Keras.ipynb)** - [Sentiment analysis with a LSTM network](IMDB/03-LSTM-Keras.ipynb)
Still the same problem, but with a network combining embedding and LSTM
### Time series with Recurrent Neural Network (RNN)
- **[SYNOP1](SYNOP/01-Preparation-of-data.ipynb)** - [Preparation of data](SYNOP/01-Preparation-of-data.ipynb)
Episode 1 : Data analysis and preparation of a meteorological dataset (SYNOP)
- **[SYNOP2](SYNOP/02-First-predictions.ipynb)** - [First predictions at 3h](SYNOP/02-First-predictions.ipynb)
Episode 2 : Learning session and weather prediction attempt at 3h
- **[SYNOP3](SYNOP/03-12h-predictions.ipynb)** - [12h predictions](SYNOP/03-12h-predictions.ipynb)
Episode 3: Attempt to predict in a more longer term
### Unsupervised learning with an autoencoder neural network (AE)
- **[AE1](AE/01-AE-with-MNIST.ipynb)** - [Building and training an AE denoiser model](AE/01-AE-with-MNIST.ipynb)
Episode 1 : After construction, the model is trained with noisy data from the MNIST dataset.
- **[AE2](AE/02-AE-with-MNIST-post.ipynb)** - [Exploring our denoiser model](AE/02-AE-with-MNIST-post.ipynb)
Episode 2 : Using the previously trained autoencoder to denoise data
### Generative network with Variational Autoencoder (VAE)
- **[VAE1](VAE/01-VAE-with-MNIST.ipynb)** - [First VAE, with a small dataset (MNIST)](VAE/01-VAE-with-MNIST.ipynb)
Construction and training of a VAE with a latent space of small dimension.
- **[VAE2](VAE/02-VAE-with-MNIST-post.ipynb)** - [Analysis of the associated latent space](VAE/02-VAE-with-MNIST-post.ipynb)
Visualization and analysis of the VAE's latent space
- **[VAE5](VAE/05-About-CelebA.ipynb)** - [Another game play : About the CelebA dataset](VAE/05-About-CelebA.ipynb)
Episode 1 : Presentation of the CelebA dataset and problems related to its size
- **[VAE6](VAE/06-Prepare-CelebA-datasets.ipynb)** - [Generation of a clustered dataset](VAE/06-Prepare-CelebA-datasets.ipynb)
Episode 2 : Analysis of the CelebA dataset and creation of an clustered and usable dataset
- **[VAE7](VAE/07-Check-CelebA.ipynb)** - [Checking the clustered dataset](VAE/07-Check-CelebA.ipynb)
Episode : 3 Clustered dataset verification and testing of our datagenerator
- **[VAE8](VAE/08-VAE-with-CelebA.ipynb)** - [Training session for our VAE](VAE/08-VAE-with-CelebA.ipynb)
Episode 4 : Training with our clustered datasets in notebook or batch mode
- **[VAE9](VAE/09-VAE-withCelebA-post.ipynb)** - [Data generation from latent space](VAE/09-VAE-withCelebA-post.ipynb)
Episode 5 : Exploring latent space to generate new data
- **[VAE10](VAE/batch_slurm.sh)** - [SLURM batch script](VAE/batch_slurm.sh)
Bash script for SLURM batch submission of VAE8 notebooks
### Miscellaneous
- **[ACTF1](Misc/Activation-Functions.ipynb)** - [Activation functions](Misc/Activation-Functions.ipynb)
Some activation functions, with their derivatives.
- **[NP1](Misc/Numpy.ipynb)** - [A short introduction to Numpy](Misc/Numpy.ipynb)
Numpy is an essential tool for the Scientific Python.
- **[TSB1](Misc/Using-Tensorboard.ipynb)** - [Tensorboard with/from Jupyter ](Misc/Using-Tensorboard.ipynb)
4 ways to use Tensorboard from the Jupyter environment
%% Cell type:markdown id: tags:
## Step 4 - Update README.md
%% Cell type:code id: tags:
``` python
# ---- Load README.md
#
with open('../README.md','r') as fp:
readme=fp.read()
# ---- Update index, version
#
readme = cookindex.tag('INDEX', index_md, readme)
readme = cookindex.tag('VERSION', config.VERSION, readme)
# ---- Save it
#
with open('../README.md','wt') as fp:
fp.write(readme)
print('README.md is updated.')
```
%% Output
README.md is updated.
%% Cell type:markdown id: tags:
## Step 5 - README.ipynb
Just execute README.ipynb
%% Cell type:raw id: tags:
# ---- Load notebook
#
notebook = nbformat.read('../README.ipynb', nbformat.NO_CONVERT)
# new_cell = nbformat.v4.new_markdown_cell(source=readme)
# notebook.cells.append(new_cell)
# ---- Execute it
#
ep = ExecutePreprocessor(timeout=600, kernel_name="python3")
ep.preprocess(notebook, {'metadata': {'path': '..'}})
# ---- Save it
with open('../READMEv2.ipynb', mode="w", encoding='utf-8') as fp:
nbformat.write(notebook)
%% Cell type:markdown id: tags:
## Step 6 - More fun : Create and execute it :-)
%% Cell type:markdown id: tags:
Plus rigolo, on va fabriquer le README.ipynb et l'executer :-)
%% Cell type:code id: tags:
``` python
# ---- Create Notebook from scratch
#
notebook = nbformat.v4.new_notebook()
# ---- Add a code cell
#
code = "from IPython.display import display,Markdown\n"
code+= "display(Markdown(open('README.md', 'r').read()))\n"
code+= "#\n"
code+= "# This README is visible under Jupiter LAb ! :-)"
new_cell = nbformat.v4.new_code_cell(source=code)
new_cell['metadata']= { "jupyter": { "source_hidden": True} }
notebook.cells.append(new_cell)
# --- Pour éviter une modification lors de l'ouverture du notebook
# pas génante, mais nécessite de resauvegarder le document à la fermeture...
notebook['metadata']["kernelspec"] = {"display_name": "Python 3", "language": "python", "name": "python3" }
# ---- Run it
#
ep = ExecutePreprocessor(timeout=600, kernel_name="python3")
ep.preprocess(notebook, {'metadata': {'path': '..'}})
# ---- Save it
#
with open('../README.ipynb', mode="w", encoding='utf-8') as fp:
nbformat.write(notebook, fp)
print('README.ipynb built and saved')
```
%% Output
README.ipynb built and saved
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment