Compare revisions

Jean-Luc Parouty · Jean-Luc Parouty · Jean-Luc Parouty · Jean-Luc Parouty · Jean-Luc Parouty · Jean-Luc Parouty Jean-Luc.Parouty@simap.grenoble-inp.fr
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,10 @@
 */.ipynb_checkpoints/*
 __pycache__
 */__pycache__/*
-/run/**
-*/data/*
+run/
+GTSRB/data
+IMDB/data
+MNIST/data
+VAE/data
+BHPD/data/*
+!BHPD/data/BostonHousing.csv
--- a/BHPD/01-DNN-Regression.ipynb
+++ b/BHPD/01-DNN-Regression.ipynb
--- a/BHPD/02-DNN-Regression-Premium.ipynb
+++ b/BHPD/02-DNN-Regression-Premium.ipynb
--- a/BHPD/data/BostonHousing.csv
+++ b/BHPD/data/BostonHousing.csv
--- a/GTSRB/01-Preparation-of-data.ipynb
+++ b/GTSRB/01-Preparation-of-data.ipynb
--- a/GTSRB/02-First-convolutions.ipynb
+++ b/GTSRB/02-First-convolutions.ipynb
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - First convolutions
+<!-- DESC --> Episode 2 : First convolutions and first results
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Recognizing traffic signs
+  - Understand the **principles** and **architecture** of a **convolutional neural network** for image classification
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+
+ - Read H5 dataset
+ - Build a model
+ - Train the model
+ - Evaluate the model
+
+## Step 1 - Import and init
+
+%% Cell type:code id: tags:
+
+``` python
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.callbacks import TensorBoard
+
+import numpy as np
+import matplotlib.pyplot as plt
+import h5py
+import os,time,sys
+
+from importlib import reload
+
+sys.path.append('..')
+import fidle.pwk as ooo
+
+ooo.init()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Load dataset
+We're going to retrieve a previously recorded dataset.
+For example: set-24x24-L
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename) as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+
+    # ---- done
+    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
+    return x_train,y_train,x_test,y_test
+
+x_train,y_train,x_test,y_test = read_dataset('set-24x24-L')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Have a look to the dataset
+We take a quick look as we go by...
+
+%% Cell type:code id: tags:
+
+``` python
+print("x_train : ", x_train.shape)
+print("y_train : ", y_train.shape)
+print("x_test  : ", x_test.shape)
+print("y_test  : ", y_test.shape)
+
+ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
+ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - Create model
+We will now build a model and train it...
+
+Some models :
+
+%% Cell type:code id: tags:
+
+``` python
+
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+# A more sophisticated model
+#
+def get_model_v2(lx,ly,lz):
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
+    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
+    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
+    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(512, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+# My sphisticated model, but small and fast
+#
+def get_model_v3(lx,ly,lz):
+    model = keras.models.Sequential()
+    model.add( keras.layers.Conv2D(32, (3,3),   activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1152, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Train the model
+**Get the shape of my data :**
+
+%% Cell type:code id: tags:
+
+``` python
+(n,lx,ly,lz) = x_train.shape
+print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+```
+
+%% Cell type:markdown id: tags:
+
+**Get and compile a model, with the data shape :**
+
+%% Cell type:code id: tags:
+
+``` python
+model = get_model_v1(lx,ly,lz)
+
+model.summary()
+
+model.compile(optimizer = 'adam',
+              loss      = 'sparse_categorical_crossentropy',
+              metrics   = ['accuracy'])
+```
+
+%% Cell type:markdown id: tags:
+
+**Train it :**
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+batch_size = 64
+epochs     = 5
+
+# ---- Shuffle train data
+x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+
+# ---- Train
+history = model.fit(  x_train, y_train,
+                      batch_size      = batch_size,
+                      epochs          = epochs,
+                      verbose         = 1,
+                      validation_data = (x_test, y_test))
+```
+
+%% Cell type:markdown id: tags:
+
+**Evaluate it :**
+
+%% Cell type:code id: tags:
+
+``` python
+max_val_accuracy = max(history.history["val_accuracy"])
+print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+score = model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - First convolutions
+<!-- DESC --> Episode 2 : First convolutions and first results
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Recognizing traffic signs
+  - Understand the **principles** and **architecture** of a **convolutional neural network** for image classification
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+
+ - Read H5 dataset
+ - Build a model
+ - Train the model
+ - Evaluate the model
+
+## Step 1 - Import and init
+
+%% Cell type:code id: tags:
+
+``` python
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.callbacks import TensorBoard
+
+import numpy as np
+import matplotlib.pyplot as plt
+import h5py
+import os,time,sys
+
+from importlib import reload
+
+sys.path.append('..')
+import fidle.pwk as ooo
+
+ooo.init()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Load dataset
+We're going to retrieve a previously recorded dataset.
+For example: set-24x24-L
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename) as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+
+    # ---- done
+    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
+    return x_train,y_train,x_test,y_test
+
+x_train,y_train,x_test,y_test = read_dataset('set-24x24-L')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Have a look to the dataset
+We take a quick look as we go by...
+
+%% Cell type:code id: tags:
+
+``` python
+print("x_train : ", x_train.shape)
+print("y_train : ", y_train.shape)
+print("x_test  : ", x_test.shape)
+print("y_test  : ", y_test.shape)
+
+ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
+ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - Create model
+We will now build a model and train it...
+
+Some models :
+
+%% Cell type:code id: tags:
+
+``` python
+
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+# A more sophisticated model
+#
+def get_model_v2(lx,ly,lz):
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
+    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
+    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
+    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(512, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+# My sphisticated model, but small and fast
+#
+def get_model_v3(lx,ly,lz):
+    model = keras.models.Sequential()
+    model.add( keras.layers.Conv2D(32, (3,3),   activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1152, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Train the model
+**Get the shape of my data :**
+
+%% Cell type:code id: tags:
+
+``` python
+(n,lx,ly,lz) = x_train.shape
+print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+```
+
+%% Cell type:markdown id: tags:
+
+**Get and compile a model, with the data shape :**
+
+%% Cell type:code id: tags:
+
+``` python
+model = get_model_v1(lx,ly,lz)
+
+model.summary()
+
+model.compile(optimizer = 'adam',
+              loss      = 'sparse_categorical_crossentropy',
+              metrics   = ['accuracy'])
+```
+
+%% Cell type:markdown id: tags:
+
+**Train it :**
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+batch_size = 64
+epochs     = 5
+
+# ---- Shuffle train data
+x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+
+# ---- Train
+history = model.fit(  x_train, y_train,
+                      batch_size      = batch_size,
+                      epochs          = epochs,
+                      verbose         = 1,
+                      validation_data = (x_test, y_test))
+```
+
+%% Cell type:markdown id: tags:
+
+**Evaluate it :**
+
+%% Cell type:code id: tags:
+
+``` python
+max_val_accuracy = max(history.history["val_accuracy"])
+print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+score = model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
--- a/GTSRB/03-Tracking-and-visualizing.ipynb
+++ b/GTSRB/03-Tracking-and-visualizing.ipynb
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - Monitoring
+<!-- DESC --> Episode 3: Monitoring and analysing training, managing checkpoints
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - **Understand** what happens during the **training** process
+  - Implement **monitoring**, **backup** and **recovery** solutions
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+
+ - Monitoring and understanding our model training
+ - Add recovery points
+ - Analyze the results
+ - Restore and run recovery points
+
+## Step 1 - Import and init
+
+%% Cell type:code id: tags:
+
+``` python
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.callbacks import TensorBoard
+
+import numpy as np
+import h5py
+
+from sklearn.metrics import confusion_matrix
+
+import matplotlib.pyplot as plt
+import seaborn as sn
+import os, sys, time, random
+
+from importlib import reload
+
+sys.path.append('..')
+import fidle.pwk as ooo
+
+ooo.init()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Load dataset
+Dataset is one of the saved dataset: RGB25, RGB35, L25, L35, etc.
+First of all, we're going to use a smart dataset : **set-24x24-L**
+(with a GPU, it only takes 35'' compared to more than 5' with a CPU !)
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename) as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+        x_meta  = f['x_meta'][:]
+        y_meta  = f['y_meta'][:]
+
+    # ---- done
+    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
+    return x_train,y_train,x_test,y_test,x_meta,y_meta
+
+x_train,y_train,x_test,y_test,x_meta,y_meta = read_dataset('set-24x24-L')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Have a look to the dataset
+Note: Data must be reshape for matplotlib
+
+%% Cell type:code id: tags:
+
+``` python
+print("x_train : ", x_train.shape)
+print("y_train : ", y_train.shape)
+print("x_test  : ", x_test.shape)
+print("y_test  : ", y_test.shape)
+
+ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
+ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - Create model
+We will now build a model and train it...
+
+Some models...
+
+%% Cell type:code id: tags:
+
+``` python
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Prepare callbacks
+We will add 2 callbacks :
+ - **TensorBoard**
+Training logs, which can be visualised with Tensorboard.
+`#tensorboard --logdir ./run/logs`
+IMPORTANT : Relancer tensorboard à chaque run
+ - **Model backup**
+ It is possible to save the model each xx epoch or at each improvement.
+ The model can be saved completely or partially (weight).
+ For full format, we can use HDF5 format.
+
+%% Cell type:raw id: tags:
+
+%%bash
+# To clean old logs and saved model, run this cell
+#
+/bin/rm -r ./run/logs   2>/dev/null
+/bin/rm -r ./run/models 2>/dev/null
+/bin/mkdir -p -m 755 ./run/logs
+/bin/mkdir -p -m 755 ./run/models
+echo -e "Reset directories : ./run/logs and ./run/models ."
+
+%% Cell type:code id: tags:
+
+``` python
+ooo.mkdir('./run/models')
+ooo.mkdir('./run/logs')
+
+# ---- Callback tensorboard
+log_dir = "./run/logs/tb_" + ooo.tag_now()
+tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+
+# ---- Callback ModelCheckpoint - Save best model
+save_dir = "./run/models/best-model.h5"
+bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+
+# ---- Callback ModelCheckpoint - Save model each epochs
+save_dir = "./run/models/model-{epoch:04d}.h5"
+savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 6 - Train the model
+**Get the shape of my data :**
+
+%% Cell type:code id: tags:
+
+``` python
+(n,lx,ly,lz) = x_train.shape
+print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+```
+
+%% Cell type:markdown id: tags:
+
+**Get and compile a model, with the data shape :**
+
+%% Cell type:code id: tags:
+
+``` python
+model = get_model_v1(lx,ly,lz)
+
+# model.summary()
+
+model.compile(optimizer='adam',
+              loss='sparse_categorical_crossentropy',
+              metrics=['accuracy'])
+```
+
+%% Cell type:markdown id: tags:
+
+**Train it :**
+Note: The training curve is visible in real time with Tensorboard :
+`#tensorboard --logdir ./run/logs`
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+batch_size = 64
+epochs     = 30
+
+# ---- Shuffle train data
+x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+
+# ---- Train
+# Note: To be faster in our example, we can take only 2000 values
+#
+history = model.fit(  x_train, y_train,
+                      batch_size=batch_size,
+                      epochs=epochs,
+                      verbose=1,
+                      validation_data=(x_test, y_test),
+                      callbacks=[tensorboard_callback, bestmodel_callback, savemodel_callback] )
+
+model.save('./run/models/last-model.h5')
+```
+
+%% Cell type:markdown id: tags:
+
+**Evaluate it :**
+
+%% Cell type:code id: tags:
+
+``` python
+max_val_accuracy = max(history.history["val_accuracy"])
+print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+score = model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 7 - History
+The return of model.fit() returns us the learning history
+
+%% Cell type:code id: tags:
+
+``` python
+ooo.plot_history(history)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 8 - Evaluation and confusion
+
+%% Cell type:code id: tags:
+
+``` python
+y_pred   = model.predict_classes(x_test)
+conf_mat = confusion_matrix(y_test,y_pred, normalize="true", labels=range(43))
+
+ooo.plot_confusion_matrix(conf_mat)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 9 - Restore and evaluate
+### 9.1 - List saved models :
+
+%% Cell type:code id: tags:
+
+``` python
+!find ./run/models/
+```
+
+%% Cell type:markdown id: tags:
+
+### 9.2 - Restore a model :
+
+%% Cell type:code id: tags:
+
+``` python
+loaded_model = tf.keras.models.load_model('./run/models/best-model.h5')
+# loaded_model.summary()
+print("Loaded.")
+```
+
+%% Cell type:markdown id: tags:
+
+### 9.3 - Evaluate it :
+
+%% Cell type:code id: tags:
+
+``` python
+score = loaded_model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+### 9.4 - Make a prediction :
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- Get a random image
+#
+i   = random.randint(1,len(x_test))
+x,y = x_test[i], y_test[i]
+
+# ---- Do prediction
+#
+predictions = loaded_model.predict( np.array([x]) )
+
+# ---- A prediction is just the output layer
+#
+print("\nOutput layer from model is (x100) :\n")
+with np.printoptions(precision=2, suppress=True, linewidth=95):
+    print(predictions*100)
+
+# ---- Graphic visualisation
+#
+print("\nGraphically :\n")
+plt.figure(figsize=(12,2))
+plt.bar(range(43), predictions[0], align='center', alpha=0.5)
+plt.ylabel('Probability')
+plt.ylim((0,1))
+plt.xlabel('Class')
+plt.title('Trafic Sign prediction')
+plt.show()
+
+# ---- Predict class
+#
+p = np.argmax(predictions)
+
+# ---- Show result
+#
+print("\nPrediction on the left, real stuff on the right :\n")
+ooo.plot_images([x,x_meta[y]], [p,y], range(2),  columns=3,  x_size=3, y_size=2)
+
+if p==y:
+    print("YEEES ! that's right!")
+else:
+    print("oups, that's wrong ;-(")
+```
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - Monitoring
+<!-- DESC --> Episode 3: Monitoring and analysing training, managing checkpoints
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - **Understand** what happens during the **training** process
+  - Implement **monitoring**, **backup** and **recovery** solutions
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+
+ - Monitoring and understanding our model training
+ - Add recovery points
+ - Analyze the results
+ - Restore and run recovery points
+
+## Step 1 - Import and init
+
+%% Cell type:code id: tags:
+
+``` python
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.callbacks import TensorBoard
+
+import numpy as np
+import h5py
+
+from sklearn.metrics import confusion_matrix
+
+import matplotlib.pyplot as plt
+import seaborn as sn
+import os, sys, time, random
+
+from importlib import reload
+
+sys.path.append('..')
+import fidle.pwk as ooo
+
+ooo.init()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Load dataset
+Dataset is one of the saved dataset: RGB25, RGB35, L25, L35, etc.
+First of all, we're going to use a smart dataset : **set-24x24-L**
+(with a GPU, it only takes 35'' compared to more than 5' with a CPU !)
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename) as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+        x_meta  = f['x_meta'][:]
+        y_meta  = f['y_meta'][:]
+
+    # ---- done
+    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
+    return x_train,y_train,x_test,y_test,x_meta,y_meta
+
+x_train,y_train,x_test,y_test,x_meta,y_meta = read_dataset('set-24x24-L')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Have a look to the dataset
+Note: Data must be reshape for matplotlib
+
+%% Cell type:code id: tags:
+
+``` python
+print("x_train : ", x_train.shape)
+print("y_train : ", y_train.shape)
+print("x_test  : ", x_test.shape)
+print("y_test  : ", y_test.shape)
+
+ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
+ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - Create model
+We will now build a model and train it...
+
+Some models...
+
+%% Cell type:code id: tags:
+
+``` python
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Prepare callbacks
+We will add 2 callbacks :
+ - **TensorBoard**
+Training logs, which can be visualised with Tensorboard.
+`#tensorboard --logdir ./run/logs`
+IMPORTANT : Relancer tensorboard à chaque run
+ - **Model backup**
+ It is possible to save the model each xx epoch or at each improvement.
+ The model can be saved completely or partially (weight).
+ For full format, we can use HDF5 format.
+
+%% Cell type:raw id: tags:
+
+%%bash
+# To clean old logs and saved model, run this cell
+#
+/bin/rm -r ./run/logs   2>/dev/null
+/bin/rm -r ./run/models 2>/dev/null
+/bin/mkdir -p -m 755 ./run/logs
+/bin/mkdir -p -m 755 ./run/models
+echo -e "Reset directories : ./run/logs and ./run/models ."
+
+%% Cell type:code id: tags:
+
+``` python
+ooo.mkdir('./run/models')
+ooo.mkdir('./run/logs')
+
+# ---- Callback tensorboard
+log_dir = "./run/logs/tb_" + ooo.tag_now()
+tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+
+# ---- Callback ModelCheckpoint - Save best model
+save_dir = "./run/models/best-model.h5"
+bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+
+# ---- Callback ModelCheckpoint - Save model each epochs
+save_dir = "./run/models/model-{epoch:04d}.h5"
+savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 6 - Train the model
+**Get the shape of my data :**
+
+%% Cell type:code id: tags:
+
+``` python
+(n,lx,ly,lz) = x_train.shape
+print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+```
+
+%% Cell type:markdown id: tags:
+
+**Get and compile a model, with the data shape :**
+
+%% Cell type:code id: tags:
+
+``` python
+model = get_model_v1(lx,ly,lz)
+
+# model.summary()
+
+model.compile(optimizer='adam',
+              loss='sparse_categorical_crossentropy',
+              metrics=['accuracy'])
+```
+
+%% Cell type:markdown id: tags:
+
+**Train it :**
+Note: The training curve is visible in real time with Tensorboard :
+`#tensorboard --logdir ./run/logs`
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+batch_size = 64
+epochs     = 30
+
+# ---- Shuffle train data
+x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+
+# ---- Train
+# Note: To be faster in our example, we can take only 2000 values
+#
+history = model.fit(  x_train, y_train,
+                      batch_size=batch_size,
+                      epochs=epochs,
+                      verbose=1,
+                      validation_data=(x_test, y_test),
+                      callbacks=[tensorboard_callback, bestmodel_callback, savemodel_callback] )
+
+model.save('./run/models/last-model.h5')
+```
+
+%% Cell type:markdown id: tags:
+
+**Evaluate it :**
+
+%% Cell type:code id: tags:
+
+``` python
+max_val_accuracy = max(history.history["val_accuracy"])
+print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+score = model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 7 - History
+The return of model.fit() returns us the learning history
+
+%% Cell type:code id: tags:
+
+``` python
+ooo.plot_history(history)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 8 - Evaluation and confusion
+
+%% Cell type:code id: tags:
+
+``` python
+y_pred   = model.predict_classes(x_test)
+conf_mat = confusion_matrix(y_test,y_pred, normalize="true", labels=range(43))
+
+ooo.plot_confusion_matrix(conf_mat)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 9 - Restore and evaluate
+### 9.1 - List saved models :
+
+%% Cell type:code id: tags:
+
+``` python
+!find ./run/models/
+```
+
+%% Cell type:markdown id: tags:
+
+### 9.2 - Restore a model :
+
+%% Cell type:code id: tags:
+
+``` python
+loaded_model = tf.keras.models.load_model('./run/models/best-model.h5')
+# loaded_model.summary()
+print("Loaded.")
+```
+
+%% Cell type:markdown id: tags:
+
+### 9.3 - Evaluate it :
+
+%% Cell type:code id: tags:
+
+``` python
+score = loaded_model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+### 9.4 - Make a prediction :
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- Get a random image
+#
+i   = random.randint(1,len(x_test))
+x,y = x_test[i], y_test[i]
+
+# ---- Do prediction
+#
+predictions = loaded_model.predict( np.array([x]) )
+
+# ---- A prediction is just the output layer
+#
+print("\nOutput layer from model is (x100) :\n")
+with np.printoptions(precision=2, suppress=True, linewidth=95):
+    print(predictions*100)
+
+# ---- Graphic visualisation
+#
+print("\nGraphically :\n")
+plt.figure(figsize=(12,2))
+plt.bar(range(43), predictions[0], align='center', alpha=0.5)
+plt.ylabel('Probability')
+plt.ylim((0,1))
+plt.xlabel('Class')
+plt.title('Trafic Sign prediction')
+plt.show()
+
+# ---- Predict class
+#
+p = np.argmax(predictions)
+
+# ---- Show result
+#
+print("\nPrediction on the left, real stuff on the right :\n")
+ooo.plot_images([x,x_meta[y]], [p,y], range(2),  columns=3,  x_size=3, y_size=2)
+
+if p==y:
+    print("YEEES ! that's right!")
+else:
+    print("oups, that's wrong ;-(")
+```
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
--- a/GTSRB/04-Data-augmentation.ipynb
+++ b/GTSRB/04-Data-augmentation.ipynb
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - Data augmentation
+<!-- DESC --> Episode 4: Improving the results with data augmentation
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Trying to improve training by **enhancing the data**
+  - Using Keras' **data augmentation utilities**, finding their limits...
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+ - Increase and improve the training dataset
+ - Identify the limits of these tools
+
+## Step 1 - Import and init
+
+%% Cell type:code id: tags:
+
+``` python
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.callbacks import TensorBoard
+
+import numpy as np
+import h5py
+
+from sklearn.metrics import confusion_matrix
+
+import matplotlib.pyplot as plt
+import seaborn as sn
+import os, sys, time, random
+
+from importlib import reload
+
+sys.path.append('..')
+import fidle.pwk as ooo
+
+ooo.init()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Dataset loader
+Dataset is one of the saved dataset: RGB25, RGB35, L25, L35, etc.
+First of all, we're going to use a smart dataset : **set-24x24-L**
+(with a GPU, it only takes 35'' compared to more than 5' with a CPU !)
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename) as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+
+    # ---- done
+    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
+    return x_train,y_train,x_test,y_test
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Models
+We will now build a model and train it...
+
+This is my model ;-)
+
+%% Cell type:code id: tags:
+
+``` python
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - Callbacks
+We prepare 2 kind callbacks :  TensorBoard and Model backup
+
+%% Cell type:code id: tags:
+
+``` python
+%%bash
+# To clean old logs and saved model, run this cell
+#
+/bin/rm -r ./run/logs   2>/dev/null
+/bin/rm -r ./run/models 2>/dev/null
+/bin/mkdir -p -m 755 ./run/logs
+/bin/mkdir -p -m 755 ./run/models
+echo -e "Reset directories : ./run/logs and ./run/models ."
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- Callback tensorboard
+log_dir = "./run/logs/tb_" + ooo.tag_now()
+tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+
+# ---- Callback ModelCheckpoint - Save best model
+save_dir = "./run/models/best-model.h5"
+bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+
+# ---- Callback ModelCheckpoint - Save model each epochs
+save_dir = "./run/models/model-{epoch:04d}.h5"
+savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Load and prepare dataset
+### 5.1 - Load
+
+%% Cell type:code id: tags:
+
+``` python
+x_train,y_train,x_test,y_test = read_dataset('set-48x48-L-LHE')
+```
+
+%% Cell type:markdown id: tags:
+
+### 5.2 - Data augmentation
+
+%% Cell type:code id: tags:
+
+``` python
+datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
+                             featurewise_std_normalization=False,
+                             width_shift_range=0.1,
+                             height_shift_range=0.1,
+                             zoom_range=0.2,
+                             shear_range=0.1,
+                             rotation_range=10.)
+datagen.fit(x_train)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 6 - Train the model
+**Get the shape of my data :**
+
+%% Cell type:code id: tags:
+
+``` python
+(n,lx,ly,lz) = x_train.shape
+print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+```
+
+%% Cell type:markdown id: tags:
+
+**Get and compile a model, with the data shape :**
+
+%% Cell type:code id: tags:
+
+``` python
+model = get_model_v3(lx,ly,lz)
+
+# model.summary()
+
+model.compile(optimizer='adam',
+              loss='sparse_categorical_crossentropy',
+              metrics=['accuracy'])
+```
+
+%% Cell type:markdown id: tags:
+
+**Train it :**
+Note : La courbe d'apprentissage est visible en temps réel avec Tensorboard :
+`#tensorboard --logdir ./run/logs`
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+batch_size = 64
+epochs     = 30
+
+# ---- Shuffle train data
+#x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+
+# ---- Train
+#
+history = model.fit(  datagen.flow(x_train, y_train, batch_size=batch_size),
+                      steps_per_epoch = int(x_train.shape[0]/batch_size),
+                      epochs=epochs,
+                      verbose=1,
+                      validation_data=(x_test, y_test),
+                      callbacks=[tensorboard_callback, bestmodel_callback, savemodel_callback] )
+
+model.save('./run/models/last-model.h5')
+```
+
+%% Cell type:markdown id: tags:
+
+**Evaluate it :**
+
+%% Cell type:code id: tags:
+
+``` python
+max_val_accuracy = max(history.history["val_accuracy"])
+print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+score = model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 7 - History
+The return of model.fit() returns us the learning history
+
+%% Cell type:code id: tags:
+
+``` python
+ooo.plot_history(history)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 8 - Evaluate best model
+
+%% Cell type:markdown id: tags:
+
+### 8.1 - Restore best model :
+
+%% Cell type:code id: tags:
+
+``` python
+loaded_model = tf.keras.models.load_model('./run/models/best-model.h5')
+# best_model.summary()
+print("Loaded.")
+```
+
+%% Cell type:markdown id: tags:
+
+### 8.2 - Evaluate it :
+
+%% Cell type:code id: tags:
+
+``` python
+score = loaded_model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+**Plot confusion matrix**
+
+%% Cell type:code id: tags:
+
+``` python
+y_pred   = model.predict_classes(x_test)
+conf_mat = confusion_matrix(y_test,y_pred, normalize="true", labels=range(43))
+
+ooo.plot_confusion_matrix(conf_mat)
+```
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - Data augmentation
+<!-- DESC --> Episode 4: Improving the results with data augmentation
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Trying to improve training by **enhancing the data**
+  - Using Keras' **data augmentation utilities**, finding their limits...
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+ - Increase and improve the training dataset
+ - Identify the limits of these tools
+
+## Step 1 - Import and init
+
+%% Cell type:code id: tags:
+
+``` python
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.callbacks import TensorBoard
+
+import numpy as np
+import h5py
+
+from sklearn.metrics import confusion_matrix
+
+import matplotlib.pyplot as plt
+import seaborn as sn
+import os, sys, time, random
+
+from importlib import reload
+
+sys.path.append('..')
+import fidle.pwk as ooo
+
+ooo.init()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Dataset loader
+Dataset is one of the saved dataset: RGB25, RGB35, L25, L35, etc.
+First of all, we're going to use a smart dataset : **set-24x24-L**
+(with a GPU, it only takes 35'' compared to more than 5' with a CPU !)
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename) as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+
+    # ---- done
+    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
+    return x_train,y_train,x_test,y_test
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Models
+We will now build a model and train it...
+
+This is my model ;-)
+
+%% Cell type:code id: tags:
+
+``` python
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - Callbacks
+We prepare 2 kind callbacks :  TensorBoard and Model backup
+
+%% Cell type:code id: tags:
+
+``` python
+%%bash
+# To clean old logs and saved model, run this cell
+#
+/bin/rm -r ./run/logs   2>/dev/null
+/bin/rm -r ./run/models 2>/dev/null
+/bin/mkdir -p -m 755 ./run/logs
+/bin/mkdir -p -m 755 ./run/models
+echo -e "Reset directories : ./run/logs and ./run/models ."
+```
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- Callback tensorboard
+log_dir = "./run/logs/tb_" + ooo.tag_now()
+tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+
+# ---- Callback ModelCheckpoint - Save best model
+save_dir = "./run/models/best-model.h5"
+bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+
+# ---- Callback ModelCheckpoint - Save model each epochs
+save_dir = "./run/models/model-{epoch:04d}.h5"
+savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Load and prepare dataset
+### 5.1 - Load
+
+%% Cell type:code id: tags:
+
+``` python
+x_train,y_train,x_test,y_test = read_dataset('set-48x48-L-LHE')
+```
+
+%% Cell type:markdown id: tags:
+
+### 5.2 - Data augmentation
+
+%% Cell type:code id: tags:
+
+``` python
+datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
+                             featurewise_std_normalization=False,
+                             width_shift_range=0.1,
+                             height_shift_range=0.1,
+                             zoom_range=0.2,
+                             shear_range=0.1,
+                             rotation_range=10.)
+datagen.fit(x_train)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 6 - Train the model
+**Get the shape of my data :**
+
+%% Cell type:code id: tags:
+
+``` python
+(n,lx,ly,lz) = x_train.shape
+print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+```
+
+%% Cell type:markdown id: tags:
+
+**Get and compile a model, with the data shape :**
+
+%% Cell type:code id: tags:
+
+``` python
+model = get_model_v3(lx,ly,lz)
+
+# model.summary()
+
+model.compile(optimizer='adam',
+              loss='sparse_categorical_crossentropy',
+              metrics=['accuracy'])
+```
+
+%% Cell type:markdown id: tags:
+
+**Train it :**
+Note : La courbe d'apprentissage est visible en temps réel avec Tensorboard :
+`#tensorboard --logdir ./run/logs`
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+batch_size = 64
+epochs     = 30
+
+# ---- Shuffle train data
+#x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+
+# ---- Train
+#
+history = model.fit(  datagen.flow(x_train, y_train, batch_size=batch_size),
+                      steps_per_epoch = int(x_train.shape[0]/batch_size),
+                      epochs=epochs,
+                      verbose=1,
+                      validation_data=(x_test, y_test),
+                      callbacks=[tensorboard_callback, bestmodel_callback, savemodel_callback] )
+
+model.save('./run/models/last-model.h5')
+```
+
+%% Cell type:markdown id: tags:
+
+**Evaluate it :**
+
+%% Cell type:code id: tags:
+
+``` python
+max_val_accuracy = max(history.history["val_accuracy"])
+print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
+```
+
+%% Cell type:code id: tags:
+
+``` python
+score = model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 7 - History
+The return of model.fit() returns us the learning history
+
+%% Cell type:code id: tags:
+
+``` python
+ooo.plot_history(history)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 8 - Evaluate best model
+
+%% Cell type:markdown id: tags:
+
+### 8.1 - Restore best model :
+
+%% Cell type:code id: tags:
+
+``` python
+loaded_model = tf.keras.models.load_model('./run/models/best-model.h5')
+# best_model.summary()
+print("Loaded.")
+```
+
+%% Cell type:markdown id: tags:
+
+### 8.2 - Evaluate it :
+
+%% Cell type:code id: tags:
+
+``` python
+score = loaded_model.evaluate(x_test, y_test, verbose=0)
+
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+
+%% Cell type:markdown id: tags:
+
+**Plot confusion matrix**
+
+%% Cell type:code id: tags:
+
+``` python
+y_pred   = model.predict_classes(x_test)
+conf_mat = confusion_matrix(y_test,y_pred, normalize="true", labels=range(43))
+
+ooo.plot_confusion_matrix(conf_mat)
+```
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
--- a/GTSRB/05-Full-convolutions.ipynb
+++ b/GTSRB/05-Full-convolutions.ipynb
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - Full convolutions
+<!-- DESC --> Episode 5: A lot of models, a lot of datasets and a lot of results.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Try multiple solutions
+  - Design a generic and batch-usable code
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+
+Our main steps:
+ - Try n models with n datasets
+ - Save a Pandas/h5 report
+ - Write to be run in batch mode
+
+## Step 1 - Import
+
+%% Cell type:code id: tags:
+
+``` python
+import tensorflow as tf
+from tensorflow import keras
+
+import numpy as np
+import h5py
+import os,time,json
+import random
+
+from IPython.display import display
+
+VERSION='1.6'
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Init and start
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- Where I am ?
+now    = time.strftime("%A %d %B %Y - %Hh%Mm%Ss")
+here   = os.getcwd()
+random.seed(time.time())
+tag_id = '{:06}'.format(random.randint(0,99999))
+
+# ---- Who I am ?
+if 'OAR_JOB_ID' in os.environ:
+    oar_id=os.environ['OAR_JOB_ID']
+else:
+    oar_id='???'
+
+print('\nFull Convolutions Notebook')
+print('  Version            : {}'.format(VERSION))
+print('  Now is             : {}'.format(now))
+print('  OAR id             : {}'.format(oar_id))
+print('  Tag id             : {}'.format(tag_id))
+print('  Working directory  : {}'.format(here))
+print('  TensorFlow version :',tf.__version__)
+print('  Keras version      :',tf.keras.__version__)
+print('  for tensorboard    : --logdir {}/run/logs_{}'.format(here,tag_id))
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Dataset loading
+
+%% Cell type:code id: tags:
+
+``` python
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename,'r') as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+
+    return x_train,y_train,x_test,y_test
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - Models collection
+
+%% Cell type:code id: tags:
+
+``` python
+
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+# A more sophisticated model
+#
+def get_model_v2(lx,ly,lz):
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
+    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
+    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
+    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(512, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+def get_model_v3(lx,ly,lz):
+    model = keras.models.Sequential()
+    model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same',  activation='relu', input_shape=(lx,ly,lz)))
+    model.add(tf.keras.layers.BatchNormalization(axis=-1))
+    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add(tf.keras.layers.Dropout(0.2))
+
+    model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same',  activation='relu'))
+    model.add(tf.keras.layers.BatchNormalization(axis=-1))
+    model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
+    model.add(tf.keras.layers.BatchNormalization(axis=-1))
+    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add(tf.keras.layers.Dropout(0.2))
+
+    model.add(tf.keras.layers.Flatten())
+    model.add(tf.keras.layers.Dense(512, activation='relu'))
+    model.add(tf.keras.layers.BatchNormalization())
+    model.add(tf.keras.layers.Dropout(0.4))
+
+    model.add(tf.keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Multiple datasets, multiple models ;-)
+
+%% Cell type:code id: tags:
+
+``` python
+def multi_run(datasets, models, datagen=None,
+              train_size=1, test_size=1, batch_size=64, epochs=16,
+              verbose=0, extension_dir='last'):
+
+    # ---- Logs and models dir
+    #
+    os.makedirs('./run/logs_{}'.format(extension_dir),   mode=0o750, exist_ok=True)
+    os.makedirs('./run/models_{}'.format(extension_dir), mode=0o750, exist_ok=True)
+
+    # ---- Columns of output
+    #
+    output={}
+    output['Dataset']=[]
+    output['Size']   =[]
+    for m in models:
+        output[m+'_Accuracy'] = []
+        output[m+'_Duration'] = []
+
+    # ---- Let's go
+    #
+    for d_name in datasets:
+        print("\nDataset : ",d_name)
+
+        # ---- Read dataset
+        x_train,y_train,x_test,y_test = read_dataset(d_name)
+        d_size=os.path.getsize('./data/'+d_name+'.h5')/(1024*1024)
+        output['Dataset'].append(d_name)
+        output['Size'].append(d_size)
+
+        # ---- Get the shape
+        (n,lx,ly,lz) = x_train.shape
+        n_train = int(x_train.shape[0]*train_size)
+        n_test  = int(x_test.shape[0]*test_size)
+
+        # ---- For each model
+        for m_name,m_function in models.items():
+            print("    Run model {}  : ".format(m_name), end='')
+            # ---- get model
+            try:
+                model=m_function(lx,ly,lz)
+                # ---- Compile it
+                model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+                # ---- Callbacks tensorboard
+                log_dir = "./run/logs_{}/tb_{}_{}".format(extension_dir, d_name, m_name)
+                tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+                # ---- Callbacks bestmodel
+                save_dir = "./run/models_{}/model_{}_{}.h5".format(extension_dir, d_name, m_name)
+                bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+                # ---- Train
+                start_time = time.time()
+                if datagen==None:
+                    # ---- No data augmentation (datagen=None) --------------------------------------
+                    history = model.fit(x_train[:n_train], y_train[:n_train],
+                                        batch_size      = batch_size,
+                                        epochs          = epochs,
+                                        verbose         = verbose,
+                                        validation_data = (x_test[:n_test], y_test[:n_test]),
+                                        callbacks       = [tensorboard_callback, bestmodel_callback])
+                else:
+                    # ---- Data augmentation (datagen given) ----------------------------------------
+                    datagen.fit(x_train)
+                    history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
+                                        steps_per_epoch = int(n_train/batch_size),
+                                        epochs          = epochs,
+                                        verbose         = verbose,
+                                        validation_data = (x_test[:n_test], y_test[:n_test]),
+                                        callbacks       = [tensorboard_callback, bestmodel_callback])
+
+                # ---- Result
+                end_time = time.time()
+                duration = end_time-start_time
+                accuracy = max(history.history["val_accuracy"])*100
+                #
+                output[m_name+'_Accuracy'].append(accuracy)
+                output[m_name+'_Duration'].append(duration)
+                print("Accuracy={:.2f} and Duration={:.2f})".format(accuracy,duration))
+            except:
+                output[m_name+'_Accuracy'].append('0')
+                output[m_name+'_Duration'].append('999')
+                print('-')
+    return output
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 6 - Run !
+
+%% Cell type:code id: tags:
+
+``` python
+start_time = time.time()
+
+print('\n---- Run','-'*50)
+
+# --------- Datasets, models, and more.. -----------------------------------
+#
+# ---- For tests
+# datasets   = ['set-24x24-L', 'set-24x24-RGB']
+# models     = {'v1':get_model_v1, 'v4':get_model_v2}
+# batch_size = 64
+# epochs     = 2
+# train_size = 0.1
+# test_size  = 0.1
+# with_datagen = False
+# verbose      = 0
+#
+# ---- All possibilities -> Run A
+# datasets     = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']
+# models       = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}
+# batch_size   = 64
+# epochs       = 16
+# train_size   = 1
+# test_size    = 1
+# with_datagen = False
+# verbose      = 0
+#
+# ---- Data augmentation -> Run B
+datasets     = ['set-48x48-RGB']
+models       = {'v2':get_model_v2}
+batch_size   = 64
+epochs       = 20
+train_size   = 1
+test_size    = 1
+with_datagen = True
+verbose      = 0
+#
+# ---------------------------------------------------------------------------
+
+# ---- Data augmentation
+#
+if with_datagen :
+    datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
+                                                           featurewise_std_normalization=False,
+                                                           width_shift_range=0.1,
+                                                           height_shift_range=0.1,
+                                                           zoom_range=0.2,
+                                                           shear_range=0.1,
+                                                           rotation_range=10.)
+else:
+    datagen=None
+
+# ---- Run
+#
+output = multi_run(datasets, models,
+                   datagen=datagen,
+                   train_size=train_size, test_size=test_size,
+                   batch_size=batch_size, epochs=epochs,
+                   verbose=verbose,
+                   extension_dir=tag_id)
+
+# ---- Save report
+#
+report={}
+report['output']=output
+report['description']='train_size={} test_size={} batch_size={} epochs={} data_aug={}'.format(train_size,test_size,batch_size,epochs,with_datagen)
+
+report_name='./run/report_{}.json'.format(tag_id)
+
+with open(report_name, 'w') as file:
+    json.dump(report, file)
+
+print('\nReport saved as ',report_name)
+end_time = time.time()
+duration = end_time-start_time
+print(f'Duration : {duration:.2f} s')
+print('-'*59)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 7 - That's all folks..
+
+%% Cell type:code id: tags:
+
+``` python
+print('\n{}'.format(time.strftime("%A %-d %B %Y, %H:%M:%S")))
+print("The work is done.\n")
+```
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - Full convolutions
+<!-- DESC --> Episode 5: A lot of models, a lot of datasets and a lot of results.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Try multiple solutions
+  - Design a generic and batch-usable code
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+
+Our main steps:
+ - Try n models with n datasets
+ - Save a Pandas/h5 report
+ - Write to be run in batch mode
+
+## Step 1 - Import
+
+%% Cell type:code id: tags:
+
+``` python
+import tensorflow as tf
+from tensorflow import keras
+
+import numpy as np
+import h5py
+import os,time,json
+import random
+
+from IPython.display import display
+
+VERSION='1.6'
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Init and start
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- Where I am ?
+now    = time.strftime("%A %d %B %Y - %Hh%Mm%Ss")
+here   = os.getcwd()
+random.seed(time.time())
+tag_id = '{:06}'.format(random.randint(0,99999))
+
+# ---- Who I am ?
+if 'OAR_JOB_ID' in os.environ:
+    oar_id=os.environ['OAR_JOB_ID']
+else:
+    oar_id='???'
+
+print('\nFull Convolutions Notebook')
+print('  Version            : {}'.format(VERSION))
+print('  Now is             : {}'.format(now))
+print('  OAR id             : {}'.format(oar_id))
+print('  Tag id             : {}'.format(tag_id))
+print('  Working directory  : {}'.format(here))
+print('  TensorFlow version :',tf.__version__)
+print('  Keras version      :',tf.keras.__version__)
+print('  for tensorboard    : --logdir {}/run/logs_{}'.format(here,tag_id))
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Dataset loading
+
+%% Cell type:code id: tags:
+
+``` python
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename,'r') as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+
+    return x_train,y_train,x_test,y_test
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - Models collection
+
+%% Cell type:code id: tags:
+
+``` python
+
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+# A more sophisticated model
+#
+def get_model_v2(lx,ly,lz):
+    model = keras.models.Sequential()
+
+    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
+    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
+    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
+    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(512, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+
+def get_model_v3(lx,ly,lz):
+    model = keras.models.Sequential()
+    model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same',  activation='relu', input_shape=(lx,ly,lz)))
+    model.add(tf.keras.layers.BatchNormalization(axis=-1))
+    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add(tf.keras.layers.Dropout(0.2))
+
+    model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same',  activation='relu'))
+    model.add(tf.keras.layers.BatchNormalization(axis=-1))
+    model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
+    model.add(tf.keras.layers.BatchNormalization(axis=-1))
+    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add(tf.keras.layers.Dropout(0.2))
+
+    model.add(tf.keras.layers.Flatten())
+    model.add(tf.keras.layers.Dense(512, activation='relu'))
+    model.add(tf.keras.layers.BatchNormalization())
+    model.add(tf.keras.layers.Dropout(0.4))
+
+    model.add(tf.keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Multiple datasets, multiple models ;-)
+
+%% Cell type:code id: tags:
+
+``` python
+def multi_run(datasets, models, datagen=None,
+              train_size=1, test_size=1, batch_size=64, epochs=16,
+              verbose=0, extension_dir='last'):
+
+    # ---- Logs and models dir
+    #
+    os.makedirs('./run/logs_{}'.format(extension_dir),   mode=0o750, exist_ok=True)
+    os.makedirs('./run/models_{}'.format(extension_dir), mode=0o750, exist_ok=True)
+
+    # ---- Columns of output
+    #
+    output={}
+    output['Dataset']=[]
+    output['Size']   =[]
+    for m in models:
+        output[m+'_Accuracy'] = []
+        output[m+'_Duration'] = []
+
+    # ---- Let's go
+    #
+    for d_name in datasets:
+        print("\nDataset : ",d_name)
+
+        # ---- Read dataset
+        x_train,y_train,x_test,y_test = read_dataset(d_name)
+        d_size=os.path.getsize('./data/'+d_name+'.h5')/(1024*1024)
+        output['Dataset'].append(d_name)
+        output['Size'].append(d_size)
+
+        # ---- Get the shape
+        (n,lx,ly,lz) = x_train.shape
+        n_train = int(x_train.shape[0]*train_size)
+        n_test  = int(x_test.shape[0]*test_size)
+
+        # ---- For each model
+        for m_name,m_function in models.items():
+            print("    Run model {}  : ".format(m_name), end='')
+            # ---- get model
+            try:
+                model=m_function(lx,ly,lz)
+                # ---- Compile it
+                model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
+                # ---- Callbacks tensorboard
+                log_dir = "./run/logs_{}/tb_{}_{}".format(extension_dir, d_name, m_name)
+                tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+                # ---- Callbacks bestmodel
+                save_dir = "./run/models_{}/model_{}_{}.h5".format(extension_dir, d_name, m_name)
+                bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+                # ---- Train
+                start_time = time.time()
+                if datagen==None:
+                    # ---- No data augmentation (datagen=None) --------------------------------------
+                    history = model.fit(x_train[:n_train], y_train[:n_train],
+                                        batch_size      = batch_size,
+                                        epochs          = epochs,
+                                        verbose         = verbose,
+                                        validation_data = (x_test[:n_test], y_test[:n_test]),
+                                        callbacks       = [tensorboard_callback, bestmodel_callback])
+                else:
+                    # ---- Data augmentation (datagen given) ----------------------------------------
+                    datagen.fit(x_train)
+                    history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
+                                        steps_per_epoch = int(n_train/batch_size),
+                                        epochs          = epochs,
+                                        verbose         = verbose,
+                                        validation_data = (x_test[:n_test], y_test[:n_test]),
+                                        callbacks       = [tensorboard_callback, bestmodel_callback])
+
+                # ---- Result
+                end_time = time.time()
+                duration = end_time-start_time
+                accuracy = max(history.history["val_accuracy"])*100
+                #
+                output[m_name+'_Accuracy'].append(accuracy)
+                output[m_name+'_Duration'].append(duration)
+                print("Accuracy={:.2f} and Duration={:.2f})".format(accuracy,duration))
+            except:
+                output[m_name+'_Accuracy'].append('0')
+                output[m_name+'_Duration'].append('999')
+                print('-')
+    return output
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 6 - Run !
+
+%% Cell type:code id: tags:
+
+``` python
+start_time = time.time()
+
+print('\n---- Run','-'*50)
+
+# --------- Datasets, models, and more.. -----------------------------------
+#
+# ---- For tests
+# datasets   = ['set-24x24-L', 'set-24x24-RGB']
+# models     = {'v1':get_model_v1, 'v4':get_model_v2}
+# batch_size = 64
+# epochs     = 2
+# train_size = 0.1
+# test_size  = 0.1
+# with_datagen = False
+# verbose      = 0
+#
+# ---- All possibilities -> Run A
+# datasets     = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']
+# models       = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}
+# batch_size   = 64
+# epochs       = 16
+# train_size   = 1
+# test_size    = 1
+# with_datagen = False
+# verbose      = 0
+#
+# ---- Data augmentation -> Run B
+datasets     = ['set-48x48-RGB']
+models       = {'v2':get_model_v2}
+batch_size   = 64
+epochs       = 20
+train_size   = 1
+test_size    = 1
+with_datagen = True
+verbose      = 0
+#
+# ---------------------------------------------------------------------------
+
+# ---- Data augmentation
+#
+if with_datagen :
+    datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
+                                                           featurewise_std_normalization=False,
+                                                           width_shift_range=0.1,
+                                                           height_shift_range=0.1,
+                                                           zoom_range=0.2,
+                                                           shear_range=0.1,
+                                                           rotation_range=10.)
+else:
+    datagen=None
+
+# ---- Run
+#
+output = multi_run(datasets, models,
+                   datagen=datagen,
+                   train_size=train_size, test_size=test_size,
+                   batch_size=batch_size, epochs=epochs,
+                   verbose=verbose,
+                   extension_dir=tag_id)
+
+# ---- Save report
+#
+report={}
+report['output']=output
+report['description']='train_size={} test_size={} batch_size={} epochs={} data_aug={}'.format(train_size,test_size,batch_size,epochs,with_datagen)
+
+report_name='./run/report_{}.json'.format(tag_id)
+
+with open(report_name, 'w') as file:
+    json.dump(report, file)
+
+print('\nReport saved as ',report_name)
+end_time = time.time()
+duration = end_time-start_time
+print(f'Duration : {duration:.2f} s')
+print('-'*59)
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 7 - That's all folks..
+
+%% Cell type:code id: tags:
+
+``` python
+print('\n{}'.format(time.strftime("%A %-d %B %Y, %H:%M:%S")))
+print("The work is done.\n")
+```
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
--- a/GTSRB/05.2-Full-convolutions-reports.ipynb
+++ b/GTSRB/05.2-Full-convolutions-reports.ipynb
--- a/GTSRB/06-Full-convolutions-batch.ipynb
+++ b/GTSRB/06-Full-convolutions-batch.ipynb
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - Full convolutions as a batch
+<!-- DESC --> Episode 6 : Run Full convolution notebook as a batch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Run a notebook code as a **job**
+  - Follow up with Tensorboard
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+Our main steps:
+ - Run Full-convolution.ipynb as a batch :
+    - Notebook mode
+    - Script mode
+ - Tensorboard follow up
+
+## Step 1 - Run a notebook as a batch
+To run a notebook in a command line :
+```jupyter nbconvert (...) --to notebook --execute <notebook>```
+
+%% Cell type:raw id: tags:
+
+%%bash
+
+# ---- This will execute and save a notebook
+#
+jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --output='./run/full_convolutions' --execute '05-Full-convolutions.ipynb'
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Export as a script (What we're going to do !)
+To export a notebook as a script :
+```jupyter nbconvert --to script <notebook>```
+To run the script :
+```ipython <script>```
+
+%% Cell type:code id: tags:
+
+``` python
+%%bash
+
+# ---- This will convert a notebook to a notebook.py script
+#
+jupyter nbconvert --to script --output='./run/full_convolutions_B' '05-Full-convolutions.ipynb'
+```
+
+%% Output
+
+    [NbConvertApp] Converting notebook 05-Full-convolutions.ipynb to script
+    [NbConvertApp] Writing 11305 bytes to ./run/full_convolutions_B.py
+
+%% Cell type:code id: tags:
+
+``` python
+!ls -l ./run/*.py
+```
+
+%% Output
+
+    -rw-r--r-- 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Batch submission
+Create batch script :
+
+%% Cell type:code id: tags:
+
+``` python
+%%writefile "./run/batch_full_convolutions_B.sh"
+#!/bin/bash
+#OAR -n Full convolutions
+#OAR -t gpu
+#OAR -l /nodes=1/gpudevice=1,walltime=01:00:00
+#OAR --stdout full_convolutions_%jobid%.out
+#OAR --stderr full_convolutions_%jobid%.err
+#OAR --project fidle
+
+#---- With cpu
+# use :
+# OAR -l /nodes=1/core=32,walltime=01:00:00
+# and add a 2>/dev/null to ipython xxx
+
+# ----------------------------------
+#   _           _       _
+#  | |__   __ _| |_ ___| |__
+#  | '_ \ / _` | __/ __| '_ \
+#  | |_) | (_| | || (__| | | |
+#  |_.__/ \__,_|\__\___|_| |_|
+#                  Full convolutions
+# ----------------------------------
+#
+
+CONDA_ENV=deeplearning2
+RUN_DIR=~/fidle/GTSRB
+RUN_SCRIPT=./run/full_convolutions_B.py
+
+# ---- Cuda Conda initialization
+#
+echo '------------------------------------------------------------'
+echo "Start : $0"
+echo '------------------------------------------------------------'
+#
+source /applis/environments/cuda_env.sh dahu 10.0
+source /applis/environments/conda.sh
+#
+conda activate "$CONDA_ENV"
+
+# ---- Run it...
+#
+cd $RUN_DIR
+ipython $RUN_SCRIPT
+```
+
+%% Output
+
+    Writing ./run/batch_full_convolutions_B.sh
+
+%% Cell type:code id: tags:
+
+``` python
+%%bash
+chmod 755 ./run/*.sh
+chmod 755 ./run/*.py
+ls -l ./run/*full_convolutions*
+```
+
+%% Output
+
+    -rwxr-xr-x 1 pjluc pjluc  1045 Jan 21 00:15 ./run/batch_full_convolutions_B.sh
+    -rwxr-xr-x 1 pjluc pjluc   611 Jan 19 15:53 ./run/batch_full_convolutions.sh
+    -rwxr-xr-x 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py
+
+%% Cell type:raw id: tags:
+
+%%bash
+./run/batch_full_convolutions.sh
+oarsub (...)
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
+%% Cell type:markdown id: tags:
+
+![Fidle](../fidle/img/00-Fidle-header-01.png)
+
+# <!-- TITLE --> CNN with GTSRB dataset - Full convolutions as a batch
+<!-- DESC --> Episode 6 : Run Full convolution notebook as a batch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Run a notebook code as a **job**
+  - Follow up with Tensorboard
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+## What we're going to do :
+Our main steps:
+ - Run Full-convolution.ipynb as a batch :
+    - Notebook mode
+    - Script mode
+ - Tensorboard follow up
+
+## Step 1 - Run a notebook as a batch
+To run a notebook in a command line :
+```jupyter nbconvert (...) --to notebook --execute <notebook>```
+
+%% Cell type:raw id: tags:
+
+%%bash
+
+# ---- This will execute and save a notebook
+#
+jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --output='./run/full_convolutions' --execute '05-Full-convolutions.ipynb'
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Export as a script (What we're going to do !)
+To export a notebook as a script :
+```jupyter nbconvert --to script <notebook>```
+To run the script :
+```ipython <script>```
+
+%% Cell type:code id: tags:
+
+``` python
+%%bash
+
+# ---- This will convert a notebook to a notebook.py script
+#
+jupyter nbconvert --to script --output='./run/full_convolutions_B' '05-Full-convolutions.ipynb'
+```
+
+%% Output
+
+    [NbConvertApp] Converting notebook 05-Full-convolutions.ipynb to script
+    [NbConvertApp] Writing 11305 bytes to ./run/full_convolutions_B.py
+
+%% Cell type:code id: tags:
+
+``` python
+!ls -l ./run/*.py
+```
+
+%% Output
+
+    -rw-r--r-- 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Batch submission
+Create batch script :
+
+%% Cell type:code id: tags:
+
+``` python
+%%writefile "./run/batch_full_convolutions_B.sh"
+#!/bin/bash
+#OAR -n Full convolutions
+#OAR -t gpu
+#OAR -l /nodes=1/gpudevice=1,walltime=01:00:00
+#OAR --stdout full_convolutions_%jobid%.out
+#OAR --stderr full_convolutions_%jobid%.err
+#OAR --project fidle
+
+#---- With cpu
+# use :
+# OAR -l /nodes=1/core=32,walltime=01:00:00
+# and add a 2>/dev/null to ipython xxx
+
+# ----------------------------------
+#   _           _       _
+#  | |__   __ _| |_ ___| |__
+#  | '_ \ / _` | __/ __| '_ \
+#  | |_) | (_| | || (__| | | |
+#  |_.__/ \__,_|\__\___|_| |_|
+#                  Full convolutions
+# ----------------------------------
+#
+
+CONDA_ENV=deeplearning2
+RUN_DIR=~/fidle/GTSRB
+RUN_SCRIPT=./run/full_convolutions_B.py
+
+# ---- Cuda Conda initialization
+#
+echo '------------------------------------------------------------'
+echo "Start : $0"
+echo '------------------------------------------------------------'
+#
+source /applis/environments/cuda_env.sh dahu 10.0
+source /applis/environments/conda.sh
+#
+conda activate "$CONDA_ENV"
+
+# ---- Run it...
+#
+cd $RUN_DIR
+ipython $RUN_SCRIPT
+```
+
+%% Output
+
+    Writing ./run/batch_full_convolutions_B.sh
+
+%% Cell type:code id: tags:
+
+``` python
+%%bash
+chmod 755 ./run/*.sh
+chmod 755 ./run/*.py
+ls -l ./run/*full_convolutions*
+```
+
+%% Output
+
+    -rwxr-xr-x 1 pjluc pjluc  1045 Jan 21 00:15 ./run/batch_full_convolutions_B.sh
+    -rwxr-xr-x 1 pjluc pjluc   611 Jan 19 15:53 ./run/batch_full_convolutions.sh
+    -rwxr-xr-x 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py
+
+%% Cell type:raw id: tags:
+
+%%bash
+./run/batch_full_convolutions.sh
+oarsub (...)
+
+%% Cell type:markdown id: tags:
+
+---
+![](../fidle/img/00-Fidle-logo-01_s.png)
--- a/GTSRB/99-Scripts-Tensorboard.ipynb
+++ b/GTSRB/99-Scripts-Tensorboard.ipynb
--- a/GTSRB/GTSRB-01-Read-dataset.ipynb
+++ b/GTSRB/GTSRB-01-Read-dataset.ipynb
--- a/IMDB/01-Embedding-Keras.ipynb
+++ b/IMDB/01-Embedding-Keras.ipynb
--- a/IMDB/02-Prediction.ipynb
+++ b/IMDB/02-Prediction.ipynb
--- a/IMDB/03-LSTM-Keras.ipynb
+++ b/IMDB/03-LSTM-Keras.ipynb
--- a/LinearReg/01-Linear-Regression.ipynb
+++ b/LinearReg/01-Linear-Regression.ipynb
--- a/LinearReg/02-Gradient-descent.ipynb
+++ b/LinearReg/02-Gradient-descent.ipynb
--- a/LinearReg/03-Polynomial-Regression.ipynb
+++ b/LinearReg/03-Polynomial-Regression.ipynb
--- a/LinearReg/04-Logistic-Regression.ipynb
+++ b/LinearReg/04-Logistic-Regression.ipynb
No results found