German Traffic Sign Recognition Benchmark (GTSRB)
=================================================
---
Introduction au Deep Learning (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020 
Vesion : 1.2.1

## Episode 7 : Full Convolutions

Our main steps:
 - Try n models with n datasets

## 1/ Import and init

In [18]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import TensorBoard

import numpy as np
import matplotlib.pyplot as plt
import h5py
import os,time

import pandas as pd
import idle.pwk as ooo
from importlib import reload
from IPython.display import display

ooo.init()

IDLE 2020 - Practical Work Module
 Version : 0.1.4
 Run time : Friday 17 January 2020, 21:38:34
 Matplotlib style : idle/talk.mplstyle
 TensorFlow version : 2.0.0
 Keras version : 2.2.4-tf


## 2/ Load dataset functions

In [23]:
def read_dataset(name):
 '''Reads h5 dataset from ./data

 Arguments: dataset name, without .h5
 Returns: x_train,y_train,x_test,y_test data'''
 # ---- Read dataset
 filename='./data/'+name+'.h5'
 with h5py.File(filename) as f:
 x_train = f['x_train'][:]
 y_train = f['y_train'][:]
 x_test = f['x_test'][:]
 y_test = f['y_test'][:]

 return x_train,y_train,x_test,y_test

## 3/ Models collection

In [3]:

# A basic model
#
def get_model_v1(lx,ly,lz):
 
 model = keras.models.Sequential()
 
 model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
 model.add( keras.layers.MaxPooling2D((2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D((2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Flatten()) 
 model.add( keras.layers.Dense(1500, activation='relu'))
 model.add( keras.layers.Dropout(0.5))

 model.add( keras.layers.Dense(43, activation='softmax'))
 return model
 
# A more sophisticated model
#
def get_model_v2(lx,ly,lz):
 model = keras.models.Sequential()

 model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
 model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
 model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
 model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Flatten())
 model.add( keras.layers.Dense(512, activation='relu'))
 model.add( keras.layers.Dropout(0.5))
 model.add( keras.layers.Dense(43, activation='softmax'))
 return model

# My sphisticated model, but small and fast
#
def get_model_v3(lx,ly,lz):
 model = keras.models.Sequential()
 model.add( keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(lx,ly,lz)))
 model.add( keras.layers.MaxPooling2D((2, 2)))
 model.add( keras.layers.Dropout(0.5))

 model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D((2, 2)))
 model.add( keras.layers.Dropout(0.5))

 model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D((2, 2)))
 model.add( keras.layers.Dropout(0.5))

 model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D((2, 2)))
 model.add( keras.layers.Dropout(0.5))

 model.add( keras.layers.Flatten()) 
 model.add( keras.layers.Dense(1152, activation='relu'))
 model.add( keras.layers.Dropout(0.5))

 model.add( keras.layers.Dense(43, activation='softmax'))
 return model


## 4/ Callbacks 

In [4]:
%%bash
# To clean old logs and saved model, run this cell
#
/bin/rm -r ./run/logs 2>/dev/null
/bin/rm -r ./run/models 2>/dev/null
/bin/ls -l ./run 2>/dev/null

total 0


In [5]:
ooo.mkdir('./run/models')
ooo.mkdir('./run/logs')

# ---- Callback tensorboard
log_dir = "./run/logs/tb_" + ooo.tag_now()
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# ---- Callback ModelCheckpoint - Save best model
save_dir = "./run/models/best-model.h5"
bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)

# ---- Callback ModelCheckpoint - Save model each epochs
save_dir = "./run/models/model-{epoch:04d}.h5"
savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)

## 6/ Multiple datasets, multiple models ;-)

In [32]:
def multi_run(datasets, models, batch_size=64, epochs=16):

 # ---- Columns of report
 #
 report={}
 report['Dataset']=[]
 report['Size'] =[]
 for m in models:
 report[m+' Accuracy'] = []
 report[m+' Duration'] = []

 # ---- Let's go
 #
 for dname in datasets:
 print("\nDataset : ",dname)

 # ---- Read dataset
 x_train,y_train,x_test,y_test = read_dataset(dname)
 dsize=os.path.getsize('./data/'+dname+'.h5')/(1024*1024)
 report['Dataset'].append(dname)
 report['Size'].append(dname)
 
 # ---- Get the shape
 (n,lx,ly,lz) = x_train.shape

 # ---- For each model
 for kmodel,fmodel in models.items():
 print(" Run model {} : ".format(kmodel), end='')
 # ---- get model
 try:
 model=fmodel(lx,ly,lz)
 # ---- Compile it
 model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
 # ---- Train
 start_time = time.time()
 history = model.fit( x_train[:1000], y_train[:1000],
 batch_size = batch_size,
 epochs = epochs,
 verbose = 0,
 validation_data = (x_test, y_test),
 callbacks = [tensorboard_callback, bestmodel_callback, savemodel_callback])
 # ---- Result
 end_time = time.time()
 duration = end_time-start_time
 accuracy = max(history.history["val_accuracy"])*100
 #
 report[kmodel+' Accuracy'].append(accuracy)
 report[kmodel+' Duration'].append(duration)
 print("Accuracy={:.2f} and Duration={:.2f})".format(accuracy,duration))
 except:
 report[kmodel+' Accuracy'].append('-')
 report[kmodel+' Duration'].append('-')
 print('-')
 print("\n")
 return report

In [33]:
%%time

# datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']
# models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}

datasets = ['set-24x24-L', 'set-24x24-RGB']
models = {'v1':get_model_v1, 'v3':get_model_v3}

out = multi_run(datasets, models, batch_size=64, epochs=2)
report = pd.DataFrame (out)



Dataset : set-24x24-L
 Run model v1 : Accuracy=9.46 and Duration=7.51)
 Run model v3 : -

Dataset : set-24x24-RGB
 Run model v1 : Accuracy=15.95 and Duration=7.95)
 Run model v3 : -


CPU times: user 1min 35s, sys: 3.31 s, total: 1min 38s
Wall time: 17 s


In [38]:
display(report)
df.to_hdf('foo.h5', 'df')

Unnamed: 0,Dataset,Size,v1 Accuracy,v1 Duration,v3 Accuracy,v3 Duration
0,set-24x24-L,set-24x24-L,9.4616,7.514726,-,-
1,set-24x24-RGB,set-24x24-RGB,15.94616,7.946994,-,-


In [41]:
df=pd.read_hdf('foo.h5', 'df')
display(df)

Unnamed: 0,Dataset,Size,v1 Accuracy,v1 Duration,v3 Accuracy,v3 Duration
0,set-24x24-L,set-24x24-L,11.89232,8.730333,-,-
1,set-24x24-RGB,set-24x24-RGB,12.707838,8.308997,-,-


---


### Some results : 



| Datasets | Size | Model : v1 | Model : v2 | Model : v3 |
|:------------------------:|:---------------:|:------------------:|:------------------:|:------------------:|
| set-24x24-L | 229 Mo | 95.91% 75.04s | 96.86% 102.28s | - - |
| set-24x24-RGB | 684 Mo | 96.60% 77.24s | 97.32% 103.93s | - - |
| set-48x48-L | 914 Mo | **96.71%** 123.94s | 97.68% 149.57s | 97.60% 91.53s |
| set-48x48-RGB | 2736 Mo | 96.36% 117.74s | **98.20%** 142.63s | 97.28% 91.29s |
| set-24x24-L-LHE | 229 Mo | 95.95% 66.12s | 96.75% 89.45s | - - |
| set-24x24-RGB-HE | 684 Mo | 95.30% 68.89s | 96.28% 92.15s | - - |
| set-48x48-L-LHE | 914 Mo | 96.69% 109.28s | 97.94% 135.17s | **97.97%** 83.80s |
| set-48x48-RGB-HE | 2736 Mo | 95.29% 117.70s | **98.13%** 141.56s | 97.00% 89.38s |