Compare revisions

Jean-Luc Parouty · Jean-Luc Parouty · Jean-Luc Parouty · Jean-Luc Parouty · Jean-Luc Parouty · Jean-Luc Parouty Jean-Luc.Parouty@simap.grenoble-inp.fr
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,6 @@
 */.ipynb_checkpoints/*
 __pycache__
 */__pycache__/*
-/run/**
+run/
 */data/*
+!/GTSRB/data/dataset.tar.gz
--- a/GTSRB/01-Preparation-of-data.ipynb
+++ b/GTSRB/01-Preparation-of-data.ipynb
+%% Cell type:markdown id: tags:
+German Traffic Sign Recognition Benchmark (GTSRB)
+=================================================
+---
+Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+Version: 1.12
+## Episode 1 : Preparation of data
+ - Understanding the dataset
+ - Preparing and formatting enhanced data
+ - Save enhanced datasets in h5 file format
+%% Cell type:markdown id: tags:
+## 1/ Import and init
+%% Cell type:code id: tags:
+``` python
+import os, time, sys
+import csv
+import math, random
+import numpy as np
+import matplotlib.pyplot as plt
+import h5py
+from skimage.morphology import disk
+from skimage.filters import rank
+from skimage import io, color, exposure, transform
+import idle.pwk as ooo
+from importlib import reload
+ooo.init()
+```
+%% Cell type:markdown id: tags:
+## 2/ Read the dataset
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+ - Each directory contains one CSV file with annotations ("GT-<ClassID>.csv") and the training images
+ - First line is fieldnames: Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId
+### 2.1/ Usefull functions
+%% Cell type:code id: tags:
+``` python
+def read_dataset_dir(csv_filename):
+    '''Reads traffic sign data from German Traffic Sign Recognition Benchmark dataset.
+    Arguments:  csv filename
+                Example /data/GTSRB/Train.csv
+    Returns:   np array of images, np array of corresponding labels'''
+    # ---- csv filename and path
+    #
+    name=os.path.basename(csv_filename)
+    path=os.path.dirname(csv_filename)
+    # ---- Read csv file
+    #
+    f,x,y = [],[],[]
+    with open(csv_filename) as csv_file:
+        reader = csv.DictReader(csv_file, delimiter=',')
+        for row in reader:
+            f.append( path+'/'+row['Path'] )
+            y.append( int(row['ClassId'])  )
+        csv_file.close()
+    nb_images = len(f)
+    # ---- Read images
+    #
+    for filename in f:
+        image=io.imread(filename)
+        x.append(image)
+        ooo.update_progress(name,len(x),nb_images)
+    # ---- Return
+    #
+    return np.array(x),np.array(y)
+```
+%% Cell type:markdown id: tags:
+### 2.2/ Read the data
+We will read the following datasets:
+ - **x_train, y_train** : Learning data
+ - **x_test, y_test** : Validation or test data
+ - x_meta, y_meta : Illustration data
+The learning data will be randomly mixted and the illustration data sorted.
+Will take about 2-3'
+%% Cell type:code id: tags:
+``` python
+%%time
+# ---- Read datasets
+(x_train,y_train) = read_dataset_dir('./data/origine/Train.csv')
+(x_test ,y_test)  = read_dataset_dir('./data/origine/Test.csv')
+(x_meta ,y_meta)  = read_dataset_dir('./data/origine/Meta.csv')
+# ---- Shuffle train set
+combined = list(zip(x_train,y_train))
+random.shuffle(combined)
+x_train,y_train = zip(*combined)
+# ---- Sort Meta
+combined = list(zip(x_meta,y_meta))
+combined.sort(key=lambda x: x[1])
+x_meta,y_meta = zip(*combined)
+```
+%% Cell type:markdown id: tags:
+## 3/ Few statistics about train dataset
+We want to know if our images are homogeneous in terms of size, ratio, width or height.
+### 3.1/ Do statistics
+%% Cell type:code id: tags:
+``` python
+train_size  = []
+train_ratio = []
+train_lx    = []
+train_ly    = []
+test_size   = []
+test_ratio  = []
+test_lx     = []
+test_ly     = []
+for image in x_train:
+    (lx,ly,lz) = image.shape
+    train_size.append(lx*ly/1024)
+    train_ratio.append(lx/ly)
+    train_lx.append(lx)
+    train_ly.append(ly)
+for image in x_test:
+    (lx,ly,lz) = image.shape
+    test_size.append(lx*ly/1024)
+    test_ratio.append(lx/ly)
+    test_lx.append(lx)
+    test_ly.append(ly)
+```
+%% Cell type:markdown id: tags:
+### 3.2/ Show statistics
+%% Cell type:code id: tags:
+``` python
+# ------ Global stuff
+print("x_train size : ",len(x_train))
+print("y_train size : ",len(y_train))
+print("x_test size  : ",len(x_test))
+print("y_test size  : ",len(y_test))
+# ------ Statistics / sizes
+plt.figure(figsize=(16,6))
+plt.hist([train_size,test_size], bins=100)
+plt.gca().set(title='Sizes in Kpixels - Train=[{:5.2f}, {:5.2f}]'.format(min(train_size),max(train_size)),
+              ylabel='Population',
+              xlim=[0,30])
+plt.legend(['Train','Test'])
+plt.show()
+# ------ Statistics / ratio lx/ly
+plt.figure(figsize=(16,6))
+plt.hist([train_ratio,test_ratio], bins=100)
+plt.gca().set(title='Ratio lx/ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ratio),max(train_ratio)),
+              ylabel='Population',
+              xlim=[0.8,1.2])
+plt.legend(['Train','Test'])
+plt.show()
+# ------ Statistics / lx
+plt.figure(figsize=(16,6))
+plt.hist([train_lx,test_lx], bins=100)
+plt.gca().set(title='Images lx - Train=[{:5.2f}, {:5.2f}]'.format(min(train_lx),max(train_lx)),
+              ylabel='Population',
+              xlim=[20,150])
+plt.legend(['Train','Test'])
+plt.show()
+# ------ Statistics / ly
+plt.figure(figsize=(16,6))
+plt.hist([train_ly,test_ly], bins=100)
+plt.gca().set(title='Images ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ly),max(train_ly)),
+              ylabel='Population',
+              xlim=[20,150])
+plt.legend(['Train','Test'])
+plt.show()
+# ------ Statistics / classId
+plt.figure(figsize=(16,6))
+plt.hist([y_train,y_test], bins=43)
+plt.gca().set(title='ClassesId',
+              ylabel='Population',
+              xlim=[0,43])
+plt.legend(['Train','Test'])
+plt.show()
+```
+%% Cell type:markdown id: tags:
+## 4/ List of classes
+What are the 43 classes of our images...
+%% Cell type:code id: tags:
+``` python
+ooo.plot_images(x_meta,y_meta, range(43), columns=8, x_size=2, y_size=2,
+                                colorbar=False, y_pred=None, cm='binary')
+```
+%% Cell type:markdown id: tags:
+## 5/ What does it really look like
+%% Cell type:code id: tags:
+``` python
+# ---- Get and show few images
+samples = [ random.randint(0,len(x_train)-1) for i in range(32)]
+ooo.plot_images(x_train,y_train, samples, columns=8, x_size=2, y_size=2, colorbar=False, y_pred=None, cm='binary')
+```
+%% Cell type:markdown id: tags:
+## 6/ dataset cooking...
+Images must have the **same size** to match the size of the network.
+It is possible to work on **rgb** or **monochrome** images and **equalize** the histograms.
+The data must be **normalized**.
+See : [Exposure with scikit-image](https://scikit-image.org/docs/dev/api/skimage.exposure.html)
+See : [Local histogram equalization](https://scikit-image.org/docs/dev/api/skimage.filters.rank.html#skimage.filters.rank.equalize)
+See : [Histogram equalization](https://scikit-image.org/docs/dev/api/skimage.exposure.html#skimage.exposure.equalize_hist)
+### 6.1/ Enhancement cook
+%% Cell type:code id: tags:
+``` python
+def images_enhancement(images, width=25, height=25, mode='RGB'):
+    '''
+    Resize and convert images - doesn't change originals.
+    input images must be RGBA or RGB.
+    args:
+        images :         images list
+        width,height :   new images size (25,25)
+        mode :           RGB | RGB-HE | L | L-HE | L-LHE | L-CLAHE
+    return:
+        numpy array of enhanced images
+    '''
+    modes = { 'RGB':3, 'RGB-HE':3, 'L':1, 'L-HE':1, 'L-LHE':1, 'L-CLAHE':1}
+    lz=modes[mode]
+    out=[]
+    for img in images:
+        # ---- if RGBA, convert to RGB
+        if img.shape[2]==4:
+            img=color.rgba2rgb(img)
+        # ---- Resize
+        img = transform.resize(img, (width,height))
+        # ---- RGB / Histogram Equalization
+        if mode=='RGB-HE':
+            hsv = color.rgb2hsv(img.reshape(width,height,3))
+            hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2])
+            img = color.hsv2rgb(hsv)
+        # ---- Grayscale
+        if mode=='L':
+            img=color.rgb2gray(img)
+        # ---- Grayscale / Histogram Equalization
+        if mode=='L-HE':
+            img=color.rgb2gray(img)
+            img=exposure.equalize_hist(img)
+        # ---- Grayscale / Local Histogram Equalization
+        if mode=='L-LHE':
+            img=color.rgb2gray(img)
+            img=rank.equalize(img, disk(10))/255.
+        # ---- Grayscale / Contrast Limited Adaptive Histogram Equalization (CLAHE)
+        if mode=='L-CLAHE':
+            img=color.rgb2gray(img)
+            img=exposure.equalize_adapthist(img)
+        # ---- Add image in list of list
+        out.append(img)
+        ooo.update_progress('Enhancement: ',len(out),len(images))
+    # ---- Reshape images
+    #     (-1, width,height,1) for L
+    #     (-1, width,height,3) for RGB
+    #
+    out = np.array(out,dtype='float64')
+    out = out.reshape(-1,width,height,lz)
+    return out
+```
+%% Cell type:markdown id: tags:
+### 6.2/ To get an idea of the different recipes
+%% Cell type:code id: tags:
+``` python
+i=random.randint(0,len(x_train)-16)
+x_samples = x_train[i:i+16]
+y_samples = y_train[i:i+16]
+datasets  = {}
+datasets['RGB']      = images_enhancement( x_samples, width=25, height=25, mode='RGB'  )
+datasets['RGB-HE']   = images_enhancement( x_samples, width=25, height=25, mode='RGB-HE'  )
+datasets['L']        = images_enhancement( x_samples, width=25, height=25, mode='L'  )
+datasets['L-HE']     = images_enhancement( x_samples, width=25, height=25, mode='L-HE'  )
+datasets['L-LHE']    = images_enhancement( x_samples, width=25, height=25, mode='L-LHE'  )
+datasets['L-CLAHE']  = images_enhancement( x_samples, width=25, height=25, mode='L-CLAHE'  )
+print('\nEXPECTED (Meta) :\n')
+x_expected=[ x_meta[i] for i in y_samples]
+ooo.plot_images(x_expected, y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+print('\nORIGINAL IMAGES :\n')
+ooo.plot_images(x_samples,  y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+print('\nENHANCED :\n')
+for k,d in datasets.items():
+    print("dataset : {}  min,max=[{:.3f},{:.3f}]  shape={}".format(k,d.min(),d.max(), d.shape))
+    ooo.plot_images(d, y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+```
+%% Cell type:markdown id: tags:
+### 6.3/ Cook and save
+A function to save a dataset
+%% Cell type:code id: tags:
+``` python
+def save_h5_dataset(x_train, y_train, x_test, y_test, x_meta,y_meta, h5name):
+    # ---- Filename
+    filename='./data/'+h5name
+    # ---- Create h5 file
+    with h5py.File(filename, "w") as f:
+        f.create_dataset("x_train", data=x_train)
+        f.create_dataset("y_train", data=y_train)
+        f.create_dataset("x_test",  data=x_test)
+        f.create_dataset("y_test",  data=y_test)
+        f.create_dataset("x_meta",  data=x_meta)
+        f.create_dataset("y_meta",  data=y_meta)
+    # ---- done
+    size=os.path.getsize(filename)/(1024*1024)
+    print('Dataset : {:24s}  shape : {:22s} size : {:6.1f} Mo   (saved)\n'.format(filename, str(x_train.shape),size))
+```
+%% Cell type:markdown id: tags:
+Create enhanced datasets, and save them...
+Will take about 7-8'
+%% Cell type:code id: tags:
+``` python
+%%time
+for s in [24, 48]:
+    for m in ['RGB', 'RGB-HE', 'L', 'L-LHE']:
+        # ---- A nice dataset name
+        name='set-{}x{}-{}.h5'.format(s,s,m)
+        print("\nDataset : ",name)
+        # ---- Enhancement
+        x_train_new = images_enhancement( x_train, width=s, height=s, mode=m )
+        x_test_new  = images_enhancement( x_test,  width=s, height=s, mode=m )
+        x_meta_new  = images_enhancement( x_meta,  width=s, height=s, mode='RGB' )
+        # ---- Save
+        save_h5_dataset( x_train_new, y_train, x_test_new, y_test, x_meta_new,y_meta, name)
+x_train_new,x_test_new=0,0
+```
+%% Cell type:markdown id: tags:
+## 7/ Reload data to be sure ;-)
+%% Cell type:code id: tags:
+``` python
+%%time
+dataset='set-48x48-L'
+samples=range(24)
+with  h5py.File('./data/'+dataset+'.h5') as f:
+    x_tmp = f['x_train'][:]
+    y_tmp = f['y_train'][:]
+    print("dataset loaded from h5 file.")
+ooo.plot_images(x_tmp,y_tmp, samples, columns=8, x_size=2, y_size=2, colorbar=False, y_pred=None, cm='binary')
+x_tmp,y_tmp=0,0
+```
+%% Cell type:markdown id: tags:
+----
+That's all folks !
+%% Cell type:markdown id: tags:
+German Traffic Sign Recognition Benchmark (GTSRB)
+=================================================
+---
+Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+Version: 1.12
+## Episode 1 : Preparation of data
+ - Understanding the dataset
+ - Preparing and formatting enhanced data
+ - Save enhanced datasets in h5 file format
+%% Cell type:markdown id: tags:
+## 1/ Import and init
+%% Cell type:code id: tags:
+``` python
+import os, time, sys
+import csv
+import math, random
+import numpy as np
+import matplotlib.pyplot as plt
+import h5py
+from skimage.morphology import disk
+from skimage.filters import rank
+from skimage import io, color, exposure, transform
+import idle.pwk as ooo
+from importlib import reload
+ooo.init()
+```
+%% Cell type:markdown id: tags:
+## 2/ Read the dataset
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+ - Each directory contains one CSV file with annotations ("GT-<ClassID>.csv") and the training images
+ - First line is fieldnames: Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId
+### 2.1/ Usefull functions
+%% Cell type:code id: tags:
+``` python
+def read_dataset_dir(csv_filename):
+    '''Reads traffic sign data from German Traffic Sign Recognition Benchmark dataset.
+    Arguments:  csv filename
+                Example /data/GTSRB/Train.csv
+    Returns:   np array of images, np array of corresponding labels'''
+    # ---- csv filename and path
+    #
+    name=os.path.basename(csv_filename)
+    path=os.path.dirname(csv_filename)
+    # ---- Read csv file
+    #
+    f,x,y = [],[],[]
+    with open(csv_filename) as csv_file:
+        reader = csv.DictReader(csv_file, delimiter=',')
+        for row in reader:
+            f.append( path+'/'+row['Path'] )
+            y.append( int(row['ClassId'])  )
+        csv_file.close()
+    nb_images = len(f)
+    # ---- Read images
+    #
+    for filename in f:
+        image=io.imread(filename)
+        x.append(image)
+        ooo.update_progress(name,len(x),nb_images)
+    # ---- Return
+    #
+    return np.array(x),np.array(y)
+```
+%% Cell type:markdown id: tags:
+### 2.2/ Read the data
+We will read the following datasets:
+ - **x_train, y_train** : Learning data
+ - **x_test, y_test** : Validation or test data
+ - x_meta, y_meta : Illustration data
+The learning data will be randomly mixted and the illustration data sorted.
+Will take about 2-3'
+%% Cell type:code id: tags:
+``` python
+%%time
+# ---- Read datasets
+(x_train,y_train) = read_dataset_dir('./data/origine/Train.csv')
+(x_test ,y_test)  = read_dataset_dir('./data/origine/Test.csv')
+(x_meta ,y_meta)  = read_dataset_dir('./data/origine/Meta.csv')
+# ---- Shuffle train set
+combined = list(zip(x_train,y_train))
+random.shuffle(combined)
+x_train,y_train = zip(*combined)
+# ---- Sort Meta
+combined = list(zip(x_meta,y_meta))
+combined.sort(key=lambda x: x[1])
+x_meta,y_meta = zip(*combined)
+```
+%% Cell type:markdown id: tags:
+## 3/ Few statistics about train dataset
+We want to know if our images are homogeneous in terms of size, ratio, width or height.
+### 3.1/ Do statistics
+%% Cell type:code id: tags:
+``` python
+train_size  = []
+train_ratio = []
+train_lx    = []
+train_ly    = []
+test_size   = []
+test_ratio  = []
+test_lx     = []
+test_ly     = []
+for image in x_train:
+    (lx,ly,lz) = image.shape
+    train_size.append(lx*ly/1024)
+    train_ratio.append(lx/ly)
+    train_lx.append(lx)
+    train_ly.append(ly)
+for image in x_test:
+    (lx,ly,lz) = image.shape
+    test_size.append(lx*ly/1024)
+    test_ratio.append(lx/ly)
+    test_lx.append(lx)
+    test_ly.append(ly)
+```
+%% Cell type:markdown id: tags:
+### 3.2/ Show statistics
+%% Cell type:code id: tags:
+``` python
+# ------ Global stuff
+print("x_train size : ",len(x_train))
+print("y_train size : ",len(y_train))
+print("x_test size  : ",len(x_test))
+print("y_test size  : ",len(y_test))
+# ------ Statistics / sizes
+plt.figure(figsize=(16,6))
+plt.hist([train_size,test_size], bins=100)
+plt.gca().set(title='Sizes in Kpixels - Train=[{:5.2f}, {:5.2f}]'.format(min(train_size),max(train_size)),
+              ylabel='Population',
+              xlim=[0,30])
+plt.legend(['Train','Test'])
+plt.show()
+# ------ Statistics / ratio lx/ly
+plt.figure(figsize=(16,6))
+plt.hist([train_ratio,test_ratio], bins=100)
+plt.gca().set(title='Ratio lx/ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ratio),max(train_ratio)),
+              ylabel='Population',
+              xlim=[0.8,1.2])
+plt.legend(['Train','Test'])
+plt.show()
+# ------ Statistics / lx
+plt.figure(figsize=(16,6))
+plt.hist([train_lx,test_lx], bins=100)
+plt.gca().set(title='Images lx - Train=[{:5.2f}, {:5.2f}]'.format(min(train_lx),max(train_lx)),
+              ylabel='Population',
+              xlim=[20,150])
+plt.legend(['Train','Test'])
+plt.show()
+# ------ Statistics / ly
+plt.figure(figsize=(16,6))
+plt.hist([train_ly,test_ly], bins=100)
+plt.gca().set(title='Images ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ly),max(train_ly)),
+              ylabel='Population',
+              xlim=[20,150])
+plt.legend(['Train','Test'])
+plt.show()
+# ------ Statistics / classId
+plt.figure(figsize=(16,6))
+plt.hist([y_train,y_test], bins=43)
+plt.gca().set(title='ClassesId',
+              ylabel='Population',
+              xlim=[0,43])
+plt.legend(['Train','Test'])
+plt.show()
+```
+%% Cell type:markdown id: tags:
+## 4/ List of classes
+What are the 43 classes of our images...
+%% Cell type:code id: tags:
+``` python
+ooo.plot_images(x_meta,y_meta, range(43), columns=8, x_size=2, y_size=2,
+                                colorbar=False, y_pred=None, cm='binary')
+```
+%% Cell type:markdown id: tags:
+## 5/ What does it really look like
+%% Cell type:code id: tags:
+``` python
+# ---- Get and show few images
+samples = [ random.randint(0,len(x_train)-1) for i in range(32)]
+ooo.plot_images(x_train,y_train, samples, columns=8, x_size=2, y_size=2, colorbar=False, y_pred=None, cm='binary')
+```
+%% Cell type:markdown id: tags:
+## 6/ dataset cooking...
+Images must have the **same size** to match the size of the network.
+It is possible to work on **rgb** or **monochrome** images and **equalize** the histograms.
+The data must be **normalized**.
+See : [Exposure with scikit-image](https://scikit-image.org/docs/dev/api/skimage.exposure.html)
+See : [Local histogram equalization](https://scikit-image.org/docs/dev/api/skimage.filters.rank.html#skimage.filters.rank.equalize)
+See : [Histogram equalization](https://scikit-image.org/docs/dev/api/skimage.exposure.html#skimage.exposure.equalize_hist)
+### 6.1/ Enhancement cook
+%% Cell type:code id: tags:
+``` python
+def images_enhancement(images, width=25, height=25, mode='RGB'):
+    '''
+    Resize and convert images - doesn't change originals.
+    input images must be RGBA or RGB.
+    args:
+        images :         images list
+        width,height :   new images size (25,25)
+        mode :           RGB | RGB-HE | L | L-HE | L-LHE | L-CLAHE
+    return:
+        numpy array of enhanced images
+    '''
+    modes = { 'RGB':3, 'RGB-HE':3, 'L':1, 'L-HE':1, 'L-LHE':1, 'L-CLAHE':1}
+    lz=modes[mode]
+    out=[]
+    for img in images:
+        # ---- if RGBA, convert to RGB
+        if img.shape[2]==4:
+            img=color.rgba2rgb(img)
+        # ---- Resize
+        img = transform.resize(img, (width,height))
+        # ---- RGB / Histogram Equalization
+        if mode=='RGB-HE':
+            hsv = color.rgb2hsv(img.reshape(width,height,3))
+            hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2])
+            img = color.hsv2rgb(hsv)
+        # ---- Grayscale
+        if mode=='L':
+            img=color.rgb2gray(img)
+        # ---- Grayscale / Histogram Equalization
+        if mode=='L-HE':
+            img=color.rgb2gray(img)
+            img=exposure.equalize_hist(img)
+        # ---- Grayscale / Local Histogram Equalization
+        if mode=='L-LHE':
+            img=color.rgb2gray(img)
+            img=rank.equalize(img, disk(10))/255.
+        # ---- Grayscale / Contrast Limited Adaptive Histogram Equalization (CLAHE)
+        if mode=='L-CLAHE':
+            img=color.rgb2gray(img)
+            img=exposure.equalize_adapthist(img)
+        # ---- Add image in list of list
+        out.append(img)
+        ooo.update_progress('Enhancement: ',len(out),len(images))
+    # ---- Reshape images
+    #     (-1, width,height,1) for L
+    #     (-1, width,height,3) for RGB
+    #
+    out = np.array(out,dtype='float64')
+    out = out.reshape(-1,width,height,lz)
+    return out
+```
+%% Cell type:markdown id: tags:
+### 6.2/ To get an idea of the different recipes
+%% Cell type:code id: tags:
+``` python
+i=random.randint(0,len(x_train)-16)
+x_samples = x_train[i:i+16]
+y_samples = y_train[i:i+16]
+datasets  = {}
+datasets['RGB']      = images_enhancement( x_samples, width=25, height=25, mode='RGB'  )
+datasets['RGB-HE']   = images_enhancement( x_samples, width=25, height=25, mode='RGB-HE'  )
+datasets['L']        = images_enhancement( x_samples, width=25, height=25, mode='L'  )
+datasets['L-HE']     = images_enhancement( x_samples, width=25, height=25, mode='L-HE'  )
+datasets['L-LHE']    = images_enhancement( x_samples, width=25, height=25, mode='L-LHE'  )
+datasets['L-CLAHE']  = images_enhancement( x_samples, width=25, height=25, mode='L-CLAHE'  )
+print('\nEXPECTED (Meta) :\n')
+x_expected=[ x_meta[i] for i in y_samples]
+ooo.plot_images(x_expected, y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+print('\nORIGINAL IMAGES :\n')
+ooo.plot_images(x_samples,  y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+print('\nENHANCED :\n')
+for k,d in datasets.items():
+    print("dataset : {}  min,max=[{:.3f},{:.3f}]  shape={}".format(k,d.min(),d.max(), d.shape))
+    ooo.plot_images(d, y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+```
+%% Cell type:markdown id: tags:
+### 6.3/ Cook and save
+A function to save a dataset
+%% Cell type:code id: tags:
+``` python
+def save_h5_dataset(x_train, y_train, x_test, y_test, x_meta,y_meta, h5name):
+    # ---- Filename
+    filename='./data/'+h5name
+    # ---- Create h5 file
+    with h5py.File(filename, "w") as f:
+        f.create_dataset("x_train", data=x_train)
+        f.create_dataset("y_train", data=y_train)
+        f.create_dataset("x_test",  data=x_test)
+        f.create_dataset("y_test",  data=y_test)
+        f.create_dataset("x_meta",  data=x_meta)
+        f.create_dataset("y_meta",  data=y_meta)
+    # ---- done
+    size=os.path.getsize(filename)/(1024*1024)
+    print('Dataset : {:24s}  shape : {:22s} size : {:6.1f} Mo   (saved)\n'.format(filename, str(x_train.shape),size))
+```
+%% Cell type:markdown id: tags:
+Create enhanced datasets, and save them...
+Will take about 7-8'
+%% Cell type:code id: tags:
+``` python
+%%time
+for s in [24, 48]:
+    for m in ['RGB', 'RGB-HE', 'L', 'L-LHE']:
+        # ---- A nice dataset name
+        name='set-{}x{}-{}.h5'.format(s,s,m)
+        print("\nDataset : ",name)
+        # ---- Enhancement
+        x_train_new = images_enhancement( x_train, width=s, height=s, mode=m )
+        x_test_new  = images_enhancement( x_test,  width=s, height=s, mode=m )
+        x_meta_new  = images_enhancement( x_meta,  width=s, height=s, mode='RGB' )
+        # ---- Save
+        save_h5_dataset( x_train_new, y_train, x_test_new, y_test, x_meta_new,y_meta, name)
+x_train_new,x_test_new=0,0
+```
+%% Cell type:markdown id: tags:
+## 7/ Reload data to be sure ;-)
+%% Cell type:code id: tags:
+``` python
+%%time
+dataset='set-48x48-L'
+samples=range(24)
+with  h5py.File('./data/'+dataset+'.h5') as f:
+    x_tmp = f['x_train'][:]
+    y_tmp = f['y_train'][:]
+    print("dataset loaded from h5 file.")
+ooo.plot_images(x_tmp,y_tmp, samples, columns=8, x_size=2, y_size=2, colorbar=False, y_pred=None, cm='binary')
+x_tmp,y_tmp=0,0
+```
+%% Cell type:markdown id: tags:
+----
+That's all folks !
--- a/GTSRB/02-First-convolutions.ipynb
+++ b/GTSRB/02-First-convolutions.ipynb
--- a/GTSRB/03-Tracking-and-visualizing.ipynb
+++ b/GTSRB/03-Tracking-and-visualizing.ipynb
+%% Cell type:markdown id: tags:
+German Traffic Sign Recognition Benchmark (GTSRB)
+=================================================
+---
+Introduction au Deep Learning  (IDLE) - S. Aria, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+## Episode 3 : Tracking, visualizing and save models
+Our main steps:
+ - Monitoring and understanding our model training
+ - Add recovery points
+ - Analyze the results
+ - Restore and run recovery pont
+## 1/ Import and init
+%% Cell type:code id: tags:
+``` python
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.callbacks import TensorBoard
+import numpy as np
+import h5py
+from sklearn.metrics import confusion_matrix
+import matplotlib.pyplot as plt
+import seaborn as sn
+import os, time, random
+import idle.pwk as ooo
+from importlib import reload
+ooo.init()
+```
+%% Cell type:markdown id: tags:
+## 2/ Load dataset
+Dataset is one of the saved dataset: RGB25, RGB35, L25, L35, etc.
+First of all, we're going to use a smart dataset : **set-24x24-L**
+(with a GPU, it only takes 35'' compared to more than 5' with a CPU !)
+%% Cell type:code id: tags:
+``` python
+%%time
+dataset ='set-24x24-RGB'
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename) as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+    # ---- done
+    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
+    return x_train,y_train,x_test,y_test
+x_train,y_train,x_test,y_test = read_dataset('set-48x48-L')
+```
+%% Cell type:markdown id: tags:
+## 3/ Have a look to the dataset
+Note: Data must be reshape for matplotlib
+%% Cell type:code id: tags:
+``` python
+print("x_train : ", x_train.shape)
+print("y_train : ", y_train.shape)
+print("x_test  : ", x_test.shape)
+print("y_test  : ", y_test.shape)
+ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
+ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+```
+%% Cell type:markdown id: tags:
+## 4/ Create model
+We will now build a model and train it...
+Some models :
+%% Cell type:code id: tags:
+``` python
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+    model = keras.models.Sequential()
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+%% Cell type:markdown id: tags:
+## 5/ Prepare callbacks
+We will add 2 callbacks :
+ - **TensorBoard**
+Training logs, which can be visualised with Tensorboard.
+`#tensorboard --logdir ./run/logs`
+IMPORTANT : Relancer tensorboard à chaque run
+ - **Model backup**
+ It is possible to save the model each xx epoch or at each improvement.
+ The model can be saved completely or partially (weight).
+ For full format, we can use HDF5 format.
+%% Cell type:code id: tags:
+``` python
+# To clean old logs and saved model, run this cell
+#
+!/bin/rm -r ./run/logs ./run/models 2>/dev/null
+!/bin/ls -l ./run  2>/dev/null
+```
+%% Cell type:code id: tags:
+``` python
+ooo.mkdir('./run/models')
+ooo.mkdir('./run/logs')
+# ---- Callback tensorboard
+log_dir = "./run/logs/tb_" + ooo.tag_now()
+tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+# ---- Callback ModelCheckpoint - Save best model
+save_dir = "./run/models/best-model.h5"
+bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+# ---- Callback ModelCheckpoint - Save model each epochs
+save_dir = "./run/models/model-{epoch:04d}.h5"
+savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)
+```
+%% Cell type:markdown id: tags:
+## 5/ Train the model
+**Get the shape of my data :**
+%% Cell type:code id: tags:
+``` python
+(n,lx,ly,lz) = x_train.shape
+print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+```
+%% Cell type:markdown id: tags:
+**Get and compile a model, with the data shape :**
+%% Cell type:code id: tags:
+``` python
+model = get_model_v1(lx,ly,lz)
+# model.summary()
+model.compile(optimizer='adam',
+              loss='sparse_categorical_crossentropy',
+              metrics=['accuracy'])
+```
+%% Cell type:markdown id: tags:
+**Train it :**
+Note : La courbe d'apprentissage est visible en temps réel avec Tensorboard :
+`#tensorboard --logdir ./run/logs`
+%% Cell type:code id: tags:
+``` python
+%%time
+batch_size = 64
+epochs     = 5
+# ---- Shuffle train data
+x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+# ---- Train
+# Note: To be faster in our example, we take only 2000 values
+#       but in the real world, we'd take the whole dataset!
+#
+history = model.fit(  x_train[:2000], y_train[:2000],
+                      batch_size=batch_size,
+                      epochs=epochs,
+                      verbose=1,
+                      validation_data=(x_test[:200], y_test[:200]),
+                      callbacks=[tensorboard_callback, bestmodel_callback, savemodel_callback] )
+model.save('./run/models/last-model.h5')
+```
+%% Cell type:markdown id: tags:
+**Evaluate it :**
+%% Cell type:code id: tags:
+``` python
+max_val_accuracy = max(history.history["val_accuracy"])
+print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
+```
+%% Cell type:code id: tags:
+``` python
+score = model.evaluate(x_test, y_test, verbose=0)
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+%% Cell type:markdown id: tags:
+## 6/ History
+The return of model.fit() returns us the learning history
+%% Cell type:code id: tags:
+``` python
+ooo.plot_history(history)
+```
+%% Cell type:markdown id: tags:
+## 7/ Evaluation and confusion
+%% Cell type:code id: tags:
+``` python
+reload(ooo)
+y_pred   = model.predict_classes(x_test)
+conf_mat = confusion_matrix(y_test,y_pred, normalize="true", labels=range(43))
+ooo.plot_confusion_matrix(conf_mat)
+```
+%% Cell type:markdown id: tags:
+## 8/ Restore and evaluate
+### 8.1/ List saved models :
+%% Cell type:code id: tags:
+``` python
+!find ./run/models/
+```
+%% Cell type:markdown id: tags:
+### 8.2/ Restore a model :
+%% Cell type:code id: tags:
+``` python
+loaded_model = tf.keras.models.load_model('./run/models/best-model.h5')
+# best_model.summary()
+print("Loaded.")
+```
+%% Cell type:markdown id: tags:
+### 8.3/ Evaluate it :
+%% Cell type:code id: tags:
+``` python
+score = loaded_model.evaluate(x_test, y_test, verbose=0)
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+%% Cell type:markdown id: tags:
+### 8.4/ Make a prediction :
+%% Cell type:code id: tags:
+``` python
+# ---- Get a random image
+#
+i   = random.randint(1,len(x_test))
+x,y = x_test[i], y_test[i]
+# ---- Do prediction
+#
+predictions = loaded_model.predict( np.array([x]) )
+# ---- A prediction is just the output layer
+#
+print("\nOutput layer from model is (x100) :\n")
+with np.printoptions(precision=2, suppress=True, linewidth=95):
+    print(predictions*100)
+# ---- Graphic visualisation
+#
+print("\nGraphically :\n")
+plt.figure(figsize=(12,2))
+plt.bar(range(43), predictions[0], align='center', alpha=0.5)
+plt.ylabel('Probability')
+plt.ylim((0,1))
+plt.xlabel('Class')
+plt.title('Trafic Sign prediction')
+plt.show()
+# ---- Predict class
+#
+p = np.argmax(predictions)
+# ---- Show result
+#
+print("\nPrediction on the left, real stuff on the right :\n")
+ooo.plot_images([x,x_meta[y]], [p,y], range(2),  columns=3,  x_size=3, y_size=2)
+if p==y:
+    print("YEEES ! that's right!")
+else:
+    print("oups, that's wrong ;-(")
+```
+%% Cell type:markdown id: tags:
+---
+That's all folks !
+%% Cell type:code id: tags:
+``` python
+!kill $(ps ax | grep 'tensorboard --port 18529' | grep -v grep | awk '{print $1}')
+```
+%% Cell type:code id: tags:
+``` python
+%load_ext tensorboard
+```
+%% Cell type:code id: tags:
+``` python
+%tensorboard --host 0.0.0.0 --port 18529 --logdir ./run/logs
+```
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:markdown id: tags:
+German Traffic Sign Recognition Benchmark (GTSRB)
+=================================================
+---
+Introduction au Deep Learning  (IDLE) - S. Aria, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+## Episode 3 : Tracking, visualizing and save models
+Our main steps:
+ - Monitoring and understanding our model training
+ - Add recovery points
+ - Analyze the results
+ - Restore and run recovery pont
+## 1/ Import and init
+%% Cell type:code id: tags:
+``` python
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.callbacks import TensorBoard
+import numpy as np
+import h5py
+from sklearn.metrics import confusion_matrix
+import matplotlib.pyplot as plt
+import seaborn as sn
+import os, time, random
+import idle.pwk as ooo
+from importlib import reload
+ooo.init()
+```
+%% Cell type:markdown id: tags:
+## 2/ Load dataset
+Dataset is one of the saved dataset: RGB25, RGB35, L25, L35, etc.
+First of all, we're going to use a smart dataset : **set-24x24-L**
+(with a GPU, it only takes 35'' compared to more than 5' with a CPU !)
+%% Cell type:code id: tags:
+``` python
+%%time
+dataset ='set-24x24-RGB'
+def read_dataset(name):
+    '''Reads h5 dataset from ./data
+    Arguments:  dataset name, without .h5
+    Returns:    x_train,y_train,x_test,y_test data'''
+    # ---- Read dataset
+    filename='./data/'+name+'.h5'
+    with  h5py.File(filename) as f:
+        x_train = f['x_train'][:]
+        y_train = f['y_train'][:]
+        x_test  = f['x_test'][:]
+        y_test  = f['y_test'][:]
+    # ---- done
+    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
+    return x_train,y_train,x_test,y_test
+x_train,y_train,x_test,y_test = read_dataset('set-48x48-L')
+```
+%% Cell type:markdown id: tags:
+## 3/ Have a look to the dataset
+Note: Data must be reshape for matplotlib
+%% Cell type:code id: tags:
+``` python
+print("x_train : ", x_train.shape)
+print("y_train : ", y_train.shape)
+print("x_test  : ", x_test.shape)
+print("y_test  : ", y_test.shape)
+ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
+ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+```
+%% Cell type:markdown id: tags:
+## 4/ Create model
+We will now build a model and train it...
+Some models :
+%% Cell type:code id: tags:
+``` python
+# A basic model
+#
+def get_model_v1(lx,ly,lz):
+    model = keras.models.Sequential()
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
+    model.add( keras.layers.MaxPooling2D((2, 2)))
+    model.add( keras.layers.Dropout(0.2))
+    model.add( keras.layers.Flatten())
+    model.add( keras.layers.Dense(1500, activation='relu'))
+    model.add( keras.layers.Dropout(0.5))
+    model.add( keras.layers.Dense(43, activation='softmax'))
+    return model
+```
+%% Cell type:markdown id: tags:
+## 5/ Prepare callbacks
+We will add 2 callbacks :
+ - **TensorBoard**
+Training logs, which can be visualised with Tensorboard.
+`#tensorboard --logdir ./run/logs`
+IMPORTANT : Relancer tensorboard à chaque run
+ - **Model backup**
+ It is possible to save the model each xx epoch or at each improvement.
+ The model can be saved completely or partially (weight).
+ For full format, we can use HDF5 format.
+%% Cell type:code id: tags:
+``` python
+# To clean old logs and saved model, run this cell
+#
+!/bin/rm -r ./run/logs ./run/models 2>/dev/null
+!/bin/ls -l ./run  2>/dev/null
+```
+%% Cell type:code id: tags:
+``` python
+ooo.mkdir('./run/models')
+ooo.mkdir('./run/logs')
+# ---- Callback tensorboard
+log_dir = "./run/logs/tb_" + ooo.tag_now()
+tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
+# ---- Callback ModelCheckpoint - Save best model
+save_dir = "./run/models/best-model.h5"
+bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+# ---- Callback ModelCheckpoint - Save model each epochs
+save_dir = "./run/models/model-{epoch:04d}.h5"
+savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)
+```
+%% Cell type:markdown id: tags:
+## 5/ Train the model
+**Get the shape of my data :**
+%% Cell type:code id: tags:
+``` python
+(n,lx,ly,lz) = x_train.shape
+print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+```
+%% Cell type:markdown id: tags:
+**Get and compile a model, with the data shape :**
+%% Cell type:code id: tags:
+``` python
+model = get_model_v1(lx,ly,lz)
+# model.summary()
+model.compile(optimizer='adam',
+              loss='sparse_categorical_crossentropy',
+              metrics=['accuracy'])
+```
+%% Cell type:markdown id: tags:
+**Train it :**
+Note : La courbe d'apprentissage est visible en temps réel avec Tensorboard :
+`#tensorboard --logdir ./run/logs`
+%% Cell type:code id: tags:
+``` python
+%%time
+batch_size = 64
+epochs     = 5
+# ---- Shuffle train data
+x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+# ---- Train
+# Note: To be faster in our example, we take only 2000 values
+#       but in the real world, we'd take the whole dataset!
+#
+history = model.fit(  x_train[:2000], y_train[:2000],
+                      batch_size=batch_size,
+                      epochs=epochs,
+                      verbose=1,
+                      validation_data=(x_test[:200], y_test[:200]),
+                      callbacks=[tensorboard_callback, bestmodel_callback, savemodel_callback] )
+model.save('./run/models/last-model.h5')
+```
+%% Cell type:markdown id: tags:
+**Evaluate it :**
+%% Cell type:code id: tags:
+``` python
+max_val_accuracy = max(history.history["val_accuracy"])
+print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
+```
+%% Cell type:code id: tags:
+``` python
+score = model.evaluate(x_test, y_test, verbose=0)
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+%% Cell type:markdown id: tags:
+## 6/ History
+The return of model.fit() returns us the learning history
+%% Cell type:code id: tags:
+``` python
+ooo.plot_history(history)
+```
+%% Cell type:markdown id: tags:
+## 7/ Evaluation and confusion
+%% Cell type:code id: tags:
+``` python
+reload(ooo)
+y_pred   = model.predict_classes(x_test)
+conf_mat = confusion_matrix(y_test,y_pred, normalize="true", labels=range(43))
+ooo.plot_confusion_matrix(conf_mat)
+```
+%% Cell type:markdown id: tags:
+## 8/ Restore and evaluate
+### 8.1/ List saved models :
+%% Cell type:code id: tags:
+``` python
+!find ./run/models/
+```
+%% Cell type:markdown id: tags:
+### 8.2/ Restore a model :
+%% Cell type:code id: tags:
+``` python
+loaded_model = tf.keras.models.load_model('./run/models/best-model.h5')
+# best_model.summary()
+print("Loaded.")
+```
+%% Cell type:markdown id: tags:
+### 8.3/ Evaluate it :
+%% Cell type:code id: tags:
+``` python
+score = loaded_model.evaluate(x_test, y_test, verbose=0)
+print('Test loss      : {:5.4f}'.format(score[0]))
+print('Test accuracy  : {:5.4f}'.format(score[1]))
+```
+%% Cell type:markdown id: tags:
+### 8.4/ Make a prediction :
+%% Cell type:code id: tags:
+``` python
+# ---- Get a random image
+#
+i   = random.randint(1,len(x_test))
+x,y = x_test[i], y_test[i]
+# ---- Do prediction
+#
+predictions = loaded_model.predict( np.array([x]) )
+# ---- A prediction is just the output layer
+#
+print("\nOutput layer from model is (x100) :\n")
+with np.printoptions(precision=2, suppress=True, linewidth=95):
+    print(predictions*100)
+# ---- Graphic visualisation
+#
+print("\nGraphically :\n")
+plt.figure(figsize=(12,2))
+plt.bar(range(43), predictions[0], align='center', alpha=0.5)
+plt.ylabel('Probability')
+plt.ylim((0,1))
+plt.xlabel('Class')
+plt.title('Trafic Sign prediction')
+plt.show()
+# ---- Predict class
+#
+p = np.argmax(predictions)
+# ---- Show result
+#
+print("\nPrediction on the left, real stuff on the right :\n")
+ooo.plot_images([x,x_meta[y]], [p,y], range(2),  columns=3,  x_size=3, y_size=2)
+if p==y:
+    print("YEEES ! that's right!")
+else:
+    print("oups, that's wrong ;-(")
+```
+%% Cell type:markdown id: tags:
+---
+That's all folks !
+%% Cell type:code id: tags:
+``` python
+!kill $(ps ax | grep 'tensorboard --port 18529' | grep -v grep | awk '{print $1}')
+```
+%% Cell type:code id: tags:
+``` python
+%load_ext tensorboard
+```
+%% Cell type:code id: tags:
+``` python
+%tensorboard --host 0.0.0.0 --port 18529 --logdir ./run/logs
+```
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:code id: tags:
+``` python
+```
--- a/GTSRB/04-Data-augmentation.ipynb
+++ b/GTSRB/04-Data-augmentation.ipynb
--- a/GTSRB/GTSRB-01-Read-dataset.ipynb
+++ b/GTSRB/GTSRB-01-Read-dataset.ipynb
--- a/GTSRB/README.ipynb
+++ b/GTSRB/README.ipynb
+%% Cell type:markdown id: tags:
+German Traffic Sign Recognition Benchmark (GTSRB)
+=================================================
+---
+Introduction au Deep Learning  (IDLE)
+S. Aria, E. Maldonado, JL. Parouty
+CNRS/SARI/DEVLOG - 2020
+Objectives of this practical work
+---------------------------------
+Traffic sign classification with **CNN**, using Tensorflow and **Keras**
+Prerequisite
+------------
+Environment, with the following packages :
+ - Python 3.6
+ - numpy
+ - Tensorflow 2.0
+ - scikit-image
+ - scikit-learn
+ - Matplotlib
+ - seaborn
+You can create it from the `environment.yml` file :
+```
+# conda env create -f environment.yml
+```
+To manage conda environment see [there](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#)
+About the dataset
+-----------------
+Name : [German Traffic Sign Recognition Benchmark (GTSRB)](http://benchmark.ini.rub.de/?section=gtsrb)
+Available [here](https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/published-archive.html)
+or on **[kaggle](https://www.kaggle.com/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign)**
+A nice example from : [Alex Staravoitau](https://navoshta.com/traffic-signs-classification/)
+In few words :
+ - Images : Variable dimensions, rgb
+ - Train set : 39209 images
+ - Test set : 12630 images
+ - Classes : 0 to 42
+Episodes
+--------
+**[01 - Preparation of data](01-Preparation-of-data.ipynb)**
+ - Understanding the dataset
+ - Preparing and formatting data
+ - Organize and backup data
+**[02 - First convolutions](02-First-convolutions.ipynb)**
+ - Read dataset
+ - Build a model
+ - Train the model
+ - Model evaluation
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:markdown id: tags:
+German Traffic Sign Recognition Benchmark (GTSRB)
+=================================================
+---
+Introduction au Deep Learning  (IDLE)
+S. Aria, E. Maldonado, JL. Parouty
+CNRS/SARI/DEVLOG - 2020
+Objectives of this practical work
+---------------------------------
+Traffic sign classification with **CNN**, using Tensorflow and **Keras**
+Prerequisite
+------------
+Environment, with the following packages :
+ - Python 3.6
+ - numpy
+ - Tensorflow 2.0
+ - scikit-image
+ - scikit-learn
+ - Matplotlib
+ - seaborn
+You can create it from the `environment.yml` file :
+```
+# conda env create -f environment.yml
+```
+To manage conda environment see [there](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#)
+About the dataset
+-----------------
+Name : [German Traffic Sign Recognition Benchmark (GTSRB)](http://benchmark.ini.rub.de/?section=gtsrb)
+Available [here](https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/published-archive.html)
+or on **[kaggle](https://www.kaggle.com/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign)**
+A nice example from : [Alex Staravoitau](https://navoshta.com/traffic-signs-classification/)
+In few words :
+ - Images : Variable dimensions, rgb
+ - Train set : 39209 images
+ - Test set : 12630 images
+ - Classes : 0 to 42
+Episodes
+--------
+**[01 - Preparation of data](01-Preparation-of-data.ipynb)**
+ - Understanding the dataset
+ - Preparing and formatting data
+ - Organize and backup data
+**[02 - First convolutions](02-First-convolutions.ipynb)**
+ - Read dataset
+ - Build a model
+ - Train the model
+ - Model evaluation
+%% Cell type:code id: tags:
+``` python
+```
--- a/GTSRB/README.md
+++ b/GTSRB/README.md
+German Traffic Sign Recognition Benchmark (GTSRB)
+=================================================
+---
+FIDLE - Formation Introduction au Deep Learning
+1/ Objectives
+   ----------
+Traffic sign classification with **CNN**, using Tensorflow and **Keras**  
+2/ About the dataset
+   -----------------
+Name : [German Traffic Sign Recognition Benchmark (GTSRB)](http://benchmark.ini.rub.de/?section=gtsrb)  
+Available [here](https://sid.erda.dk/public/archives/daaeac0d7ce1152aea9b61d9f1e19370/published-archive.html) 
+or on **[kaggle](https://www.kaggle.com/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign)**  
+A nice example from : [Alex Staravoitau](https://navoshta.com/traffic-signs-classification/)  
+In few words :
+ - Images : Variable dimensions, rgb
+ - Train set : 39209 images  
+ - Test set : 12630 images
+ - Classes : 0 to 42
+3/ Episodes
+   --------
+   01 - Dataset preparation  
+   - Undestand the data
+   02 - First convolutions  
--- a/GTSRB/data/dataset.tar.gz
+++ b/GTSRB/data/dataset.tar.gz
--- a/GTSRB/idle/__init__.py
+++ b/GTSRB/idle/__init__.py
+VERSION='0.1a'
\ No newline at end of file
--- a/GTSRB/idle/pwk.py
+++ b/GTSRB/idle/pwk.py
+# ==================================================================
+#  ____                 _   _           _  __        __         _
+# |  _ \ _ __ __ _  ___| |_(_) ___ __ _| | \ \      / /__  _ __| | __
+# | |_) | '__/ _` |/ __| __| |/ __/ _` | |  \ \ /\ / / _ \| '__| |/ /
+# |  __/| | | (_| | (__| |_| | (_| (_| | |   \ V  V / (_) | |  |   <
+# |_|   |_|  \__,_|\___|\__|_|\___\__,_|_|    \_/\_/ \___/|_|  |_|\_\
+#                                                        module pwk                                   
+# ==================================================================
+# A simple module to host some common functions for practical work
+# pjluc 2019
+import os
+import glob
+from datetime import datetime
+import itertools
+import datetime
+import math
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+import matplotlib
+import matplotlib.pyplot as plt
+import seaborn as sn
+VERSION='0.1.4'
+# -------------------------------------------------------------
+# init_all
+# -------------------------------------------------------------
+#
+def init(mplstyle='idle/talk.mplstyle'):
+    global VERSION
+    # ---- matplotlib
+    matplotlib.style.use(mplstyle)
+    # ---- Hello world
+    now = datetime.datetime.now()
+    print('IDLE 2020 - Practical Work Module')
+    print('  Version            :', VERSION)
+    print('  Run time           : {}'.format(now.strftime("%A %-d %B %Y, %H:%M:%S")))
+    print('  Matplotlib style   :', mplstyle)
+    print('  TensorFlow version :',tf.__version__)
+    print('  Keras version      :',tf.keras.__version__)
+# -------------------------------------------------------------
+# Folder cooking
+# -------------------------------------------------------------
+#
+def tag_now():
+    return datetime.datetime.now().strftime("%Y-%m-%d_%Hh%Mm%Ss")
+def mkdir(path):
+    os.makedirs(path, mode=0o750, exist_ok=True)
+def get_directory_size(path):
+    """
+    Return the directory size, but only 1 level
+    args:
+        path : directory path
+    return:
+        size in Mo
+    """
+    size=0
+    for f in os.listdir(path):
+        if os.path.isfile(path+'/'+f):
+            size+=os.path.getsize(path+'/'+f)
+    return size/(1024*1024)
+# -------------------------------------------------------------
+# shuffle_dataset
+# -------------------------------------------------------------
+#
+def shuffle_np_dataset(x, y):
+    assert (len(x) == len(y)), "x and y must have same size"
+    p = np.random.permutation(len(x))
+    return x[p], y[p]
+def update_progress(what,i,imax):
+    bar_length = min(40,imax)
+    if (i%int(imax/bar_length))!=0 and i<imax:
+        return
+    progress  = float(i/imax)
+    block     = int(round(bar_length * progress))
+    endofline = '\r' if progress<1 else '\n'
+    text = "{:16s} [{}] {:>5.1f}% of {}".format( what, "#"*block+"-"*(bar_length-block), progress*100, imax)
+    print(text, end=endofline)
+# -------------------------------------------------------------
+# show_images
+# -------------------------------------------------------------
+#
+def plot_images(x,y, indices, columns=12, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary'):
+    """
+    Show some images in a grid, with legends
+    args:
+        X: images - Shapes must be (-1 lx,ly,1) or (-1 lx,ly,3)
+        y: real classes
+        indices: indices of images to show
+        columns: number of columns (12)
+        x_size,y_size: figure size
+        colorbar: show colorbar (False)
+        y_pred: predicted classes (None)
+        cm: Matplotlib olor map
+    returns: 
+        nothing
+    """
+    rows    = math.ceil(len(indices)/columns)
+    fig=plt.figure(figsize=(columns*x_size, rows*(y_size+0.35)))
+    n=1
+    errors=0 
+    if np.any(y_pred)==None:
+        y_pred=y
+    for i in indices:
+        axs=fig.add_subplot(rows, columns, n)
+        n+=1
+        # Shapes must be differents for RGB and L
+        (lx,ly,lz)=x[i].shape
+        if lz==1:
+            img=axs.imshow(x[i].reshape(lx,ly),   cmap = cm, interpolation='lanczos')
+        else:
+            img=axs.imshow(x[i].reshape(lx,ly,lz),cmap = cm, interpolation='lanczos')
+        axs.spines['right'].set_visible(True)
+        axs.spines['left'].set_visible(True)
+        axs.spines['top'].set_visible(True)
+        axs.spines['bottom'].set_visible(True)
+        axs.set_yticks([])
+        axs.set_xticks([])
+        if y[i]!=y_pred[i]:
+            axs.set_xlabel('{} ({})'.format(y_pred[i],y[i]))
+            axs.xaxis.label.set_color('red')
+            errors+=1
+        else:
+            axs.set_xlabel(y[i])
+        if colorbar:
+            fig.colorbar(img,orientation="vertical", shrink=0.65)
+    plt.show()
+def plot_image(x,cm='binary', figsize=(4,4)):
+    (lx,ly,lz)=x.shape
+    plt.figure(figsize=figsize)
+    if lz==1:
+        plt.imshow(x.reshape(lx,ly),   cmap = cm, interpolation='lanczos')
+    else:
+        plt.imshow(x.reshape(lx,ly,lz),cmap = cm, interpolation='lanczos')
+    plt.show()
+# -------------------------------------------------------------
+# show_history
+# -------------------------------------------------------------
+#
+def plot_history(history, figsize=(8,6)):
+    """
+    Show history
+    args:
+        history: history
+        save_as: filename to save or None
+    """
+    # Accuracy 
+    plt.figure(figsize=figsize)
+    plt.plot(history.history['accuracy'])
+    plt.plot(history.history['val_accuracy'])
+    plt.title('Model accuracy')
+    plt.ylabel('Accuracy')
+    plt.xlabel('Epoch')
+    plt.legend(['Train', 'Test'], loc='upper left')
+    plt.show()
+    # Loss values
+    plt.figure(figsize=figsize)
+    plt.plot(history.history['loss'])
+    plt.plot(history.history['val_loss'])
+    plt.title('Model loss')
+    plt.ylabel('Loss')
+    plt.xlabel('Epoch')
+    plt.legend(['Train', 'Test'], loc='upper left')
+    plt.show()    
+# -------------------------------------------------------------
+# plot_confusion_matrix
+# -------------------------------------------------------------
+#
+def plot_confusion_matrix(cm,
+                          title='Confusion matrix',
+                          figsize=(12,8),
+                          cmap="gist_heat_r",
+                          vmin=0,
+                          vmax=1,
+                          xticks=5,yticks=5):
+    """
+    given a sklearn confusion matrix (cm), make a nice plot
+    Args:
+        cm:           confusion matrix from sklearn.metrics.confusion_matrix
+        title:        the text to display at the top of the matrix
+        figsize:      Figure size (12,8)
+        cmap:         color map (gist_heat_r)
+        vmi,vmax:     Min/max 0 and 1
+    """
+    accuracy = np.trace(cm) / float(np.sum(cm))
+    misclass = 1 - accuracy
+    plt.figure(figsize=figsize)
+    sn.heatmap(cm, linewidths=1, linecolor="#ffffff",square=True, 
+               cmap=cmap, xticklabels=xticks, yticklabels=yticks,
+               vmin=vmin,vmax=vmax)
+    plt.ylabel('True label')
+    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
+    plt.show()
--- a/GTSRB/idle/talk.mplstyle
+++ b/GTSRB/idle/talk.mplstyle
+# See : https://matplotlib.org/users/customizing.html
+axes.titlesize : 24
+axes.labelsize : 20
+axes.edgecolor      : dimgrey
+axes.labelcolor     : dimgrey
+axes.linewidth      : 2
+axes.grid           : False
+axes.prop_cycle    : cycler('color', ['steelblue', 'tomato', '2ca02c', 'd62728', '9467bd', '8c564b', 'e377c2', '7f7f7f', 'bcbd22', '17becf'])
+lines.linewidth     : 3
+lines.markersize    : 10
+xtick.color         : black
+xtick.labelsize     : 18
+ytick.color         : black
+ytick.labelsize     : 18
+axes.spines.left   : True
+axes.spines.bottom : True
+axes.spines.top    : False
+axes.spines.right  : False
+savefig.dpi         : 300      # figure dots per inch or 'figure'
+savefig.facecolor   : white    # figure facecolor when saving
+savefig.edgecolor   : white    # figure edgecolor when saving
+savefig.format      : svg
+savefig.bbox        : tight
+savefig.pad_inches  : 0.1
+savefig.transparent : True
+savefig.jpeg_quality: 95
--- a/environment.yml
+++ b/environment.yml
+name: deeplearning2
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _tflow_select=2.1.0=gpu
+  - absl-py=0.8.1=py37_0
+  - astor=0.8.0=py37_0
+  - attrs=19.3.0=py_0
+  - backcall=0.1.0=py37_0
+  - blas=1.0=mkl
+  - bleach=3.1.0=py_0
+  - c-ares=1.15.0=h7b6447c_1001
+  - ca-certificates=2019.11.27=0
+  - certifi=2019.11.28=py37_0
+  - cloudpickle=1.2.2=py_0
+  - cudatoolkit=10.0.130=0
+  - cudnn=7.6.4=cuda10.0_0
+  - cupti=10.0.130=0
+  - cycler=0.10.0=py37_0
+  - cytoolz=0.10.1=py37h7b6447c_0
+  - dask-core=2.9.0=py_0
+  - dbus=1.13.12=h746ee38_0
+  - decorator=4.4.1=py_0
+  - defusedxml=0.6.0=py_0
+  - entrypoints=0.3=py37_0
+  - expat=2.2.6=he6710b0_0
+  - fontconfig=2.13.0=h9420a91_0
+  - freetype=2.9.1=h8a8886c_1
+  - gast=0.2.2=py37_0
+  - glib=2.63.1=h5a9c865_0
+  - gmp=6.1.2=h6c8ec71_1
+  - google-pasta=0.1.8=py_0
+  - grpcio=1.16.1=py37hf8bcb03_1
+  - gst-plugins-base=1.14.0=hbbd80ab_1
+  - gstreamer=1.14.0=hb453b48_1
+  - h5py=2.9.0=py37h7918eee_0
+  - hdf5=1.10.4=hb1b8bf9_0
+  - icu=58.2=h9c2bf20_1
+  - imageio=2.6.1=py37_0
+  - importlib_metadata=1.3.0=py37_0
+  - intel-openmp=2019.4=243
+  - ipykernel=5.1.3=py37h39e3cac_0
+  - ipython=7.10.2=py37h39e3cac_0
+  - ipython_genutils=0.2.0=py37_0
+  - jedi=0.15.1=py37_0
+  - jinja2=2.10.3=py_0
+  - joblib=0.14.1=py_0
+  - jpeg=9b=h024ee3a_2
+  - json5=0.8.5=py_0
+  - jsonschema=3.2.0=py37_0
+  - jupyter_client=5.3.4=py37_0
+  - jupyter_core=4.6.1=py37_0
+  - jupyterlab=1.2.4=pyhf63ae98_0
+  - jupyterlab_server=1.0.6=py_0
+  - keras-applications=1.0.8=py_0
+  - keras-preprocessing=1.1.0=py_1
+  - kiwisolver=1.1.0=py37he6710b0_0
+  - libedit=3.1.20181209=hc058e9b_0
+  - libffi=3.2.1=hd88cf55_4
+  - libgcc-ng=9.1.0=hdf63c60_0
+  - libgfortran-ng=7.3.0=hdf63c60_0
+  - libpng=1.6.37=hbc83047_0
+  - libprotobuf=3.11.2=hd408876_0
+  - libsodium=1.0.16=h1bed415_0
+  - libstdcxx-ng=9.1.0=hdf63c60_0
+  - libtiff=4.1.0=h2733197_0
+  - libuuid=1.0.3=h1bed415_2
+  - libxcb=1.13=h1bed415_1
+  - libxml2=2.9.9=hea5a465_1
+  - markdown=3.1.1=py37_0
+  - markupsafe=1.1.1=py37h7b6447c_0
+  - matplotlib=3.1.1=py37h5429711_0
+  - mistune=0.8.4=py37h7b6447c_0
+  - mkl=2019.4=243
+  - mkl-service=2.3.0=py37he904b0f_0
+  - mkl_fft=1.0.15=py37ha843d7b_0
+  - mkl_random=1.1.0=py37hd6b4f25_0
+  - more-itertools=8.0.2=py_0
+  - nbconvert=5.6.1=py37_0
+  - nbformat=4.4.0=py37_0
+  - ncurses=6.1=he6710b0_1
+  - networkx=2.4=py_0
+  - notebook=6.0.2=py37_0
+  - numpy=1.17.4=py37hc1035e2_0
+  - numpy-base=1.17.4=py37hde5b4d6_0
+  - olefile=0.46=py_0
+  - openssl=1.1.1d=h7b6447c_3
+  - opt_einsum=3.1.0=py_0
+  - pandas=0.25.3=py37he6710b0_0
+  - pandoc=2.2.3.2=0
+  - pandocfilters=1.4.2=py37_1
+  - parso=0.5.2=py_0
+  - patsy=0.5.1=py37_0
+  - pcre=8.43=he6710b0_0
+  - pexpect=4.7.0=py37_0
+  - pickleshare=0.7.5=py37_0
+  - pillow=6.2.1=py37h34e0f95_0
+  - pip=19.3.1=py37_0
+  - prometheus_client=0.7.1=py_0
+  - prompt_toolkit=3.0.2=py_0
+  - protobuf=3.11.2=py37he6710b0_0
+  - ptyprocess=0.6.0=py37_0
+  - pygments=2.5.2=py_0
+  - pyparsing=2.4.5=py_0
+  - pyqt=5.9.2=py37h05f1152_2
+  - pyrsistent=0.15.6=py37h7b6447c_0
+  - python=3.7.5=h0371630_0
+  - python-dateutil=2.8.1=py_0
+  - pytz=2019.3=py_0
+  - pywavelets=1.1.1=py37h7b6447c_0
+  - pyzmq=18.1.0=py37he6710b0_0
+  - qt=5.9.7=h5867ecd_1
+  - readline=7.0=h7b6447c_5
+  - scikit-image=0.15.0=py37he6710b0_0
+  - scikit-learn=0.22=py37hd81dba3_0
+  - scipy=1.3.2=py37h7c811a0_0
+  - seaborn=0.9.0=pyh91ea838_1
+  - send2trash=1.5.0=py37_0
+  - setuptools=42.0.2=py37_0
+  - sip=4.19.8=py37hf484d3e_0
+  - six=1.13.0=py37_0
+  - sqlite=3.30.1=h7b6447c_0
+  - statsmodels=0.10.1=py37hdd07704_0
+  - tensorboard=2.0.0=pyhb38c66f_1
+  - tensorflow=2.0.0=gpu_py37h768510d_0
+  - tensorflow-base=2.0.0=gpu_py37h0ec5d1f_0
+  - tensorflow-estimator=2.0.0=pyh2649769_0
+  - tensorflow-gpu=2.0.0=h0d30ee6_0
+  - termcolor=1.1.0=py37_1
+  - terminado=0.8.3=py37_0
+  - testpath=0.4.4=py_0
+  - tk=8.6.8=hbc83047_0
+  - toolz=0.10.0=py_0
+  - tornado=6.0.3=py37h7b6447c_0
+  - traitlets=4.3.3=py37_0
+  - wcwidth=0.1.7=py37_0
+  - webencodings=0.5.1=py37_1
+  - werkzeug=0.16.0=py_0
+  - wheel=0.33.6=py37_0
+  - wrapt=1.11.2=py37h7b6447c_0
+  - xz=5.2.4=h14c3975_4
+  - zeromq=4.3.1=he6710b0_3
+  - zipp=0.6.0=py_0
+  - zlib=1.2.11=h7b6447c_3
+  - zstd=1.3.7=h0b5b093_0
+  - pip:
+    - dask==2.9.0
+prefix: /home/paroutyj/.conda/envs/deeplearning2
No results found