<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>

# <!-- TITLE --> [GTS5] - CNN with GTSRB dataset - Full convolutions 
<!-- DESC --> Episode 5 : A lot of models, a lot of datasets and a lot of results.
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->

## Objectives :
 - Try multiple solutions
 - Design a generic and batch-usable code
 
The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes. 
The final aim is to recognise them ! 
Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset


## What we're going to do :

Our main steps:
 - Try n models with n datasets
 - Save a Pandas/h5 report
 - Write to be run in batch mode

## Step 1 - Import
### 1.1 - Python

In [1]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import h5py
import os,time,json
import random

from IPython.display import display

VERSION='1.6'

### 1.2 - Where are we ? 

In [5]:
# At GRICAD
dataset_dir = '/bettik/PROJECTS/pr-fidle/datasets/GTSRB/'

# At IDRIS
# dataset_dir = f'{os.getenv("WORK","")}/datasets/GTSRB'

# At Home
# dataset_dir = f'{os.getenv("HOME","")}/datasets/GTSRB'

print(f'We will use : dataset_dir={dataset_dir}')


We will use : dataset_dir=/bettik/PROJECTS/pr-fidle/datasets/GTSRB


## Step 2 - Init and start

In [11]:
# ---- Where I am ?
now = time.strftime("%A %d %B %Y - %Hh%Mm%Ss")
here = os.getcwd()
random.seed(time.time())
tag_id = '{:06}'.format(random.randint(0,99999))

# ---- Who I am ?
oar_id = os.getenv("OAR_JOB_ID", "??")
slurm_id = os.getenv("SLURM_JOBID", "??")
print('\nFull Convolutions Notebook')
print(' Version : {}'.format(VERSION))
print(' Now is : {}'.format(now))
print(' OAR id : {}'.format(oar_id))
print(' SLURM id : {}'.format(slurm_id))
print(' Tag id : {}'.format(tag_id))
print(' Working directory : {}'.format(here))
print(' Dataset_dir : {}'.format(dataset_dir))
print(' TensorFlow version :',tf.__version__)
print(' Keras version :',tf.keras.__version__)
print(' for tensorboard : --logdir {}/run/logs_{}'.format(here,tag_id))


Full Convolutions Notebook
 Version : 1.6
 Now is : Friday 28 February 2020 - 15h06m25s
 OAR id : 5878410
 SLURM id : ??
 Tag id : 083052
 Working directory : /home/paroutyj/fidle/GTSRB
 Dataset_dir : /bettik/PROJECTS/pr-fidle/datasets/GTSRB
 TensorFlow version : 2.0.0
 Keras version : 2.2.4-tf
 for tensorboard : --logdir /home/paroutyj/fidle/GTSRB/run/logs_083052


## Step 3 - Dataset loading

In [20]:
def read_dataset(dataset_dir, name):
 '''Reads h5 dataset from dataset_dir
 Args:
 dataset_dir : datasets dir
 name : dataset name, without .h5
 Returns: x_train,y_train,x_test,y_test data'''
 # ---- Read dataset
 filename=f'{dataset_dir}/{name}.h5'
 with h5py.File(filename,'r') as f:
 x_train = f['x_train'][:]
 y_train = f['y_train'][:]
 x_test = f['x_test'][:]
 y_test = f['y_test'][:]

 # ---- done
 return x_train,y_train,x_test,y_test 

## Step 4 - Models collection

In [13]:

# A basic model
#
def get_model_v1(lx,ly,lz):
 
 model = keras.models.Sequential()
 
 model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
 model.add( keras.layers.MaxPooling2D((2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D((2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Flatten()) 
 model.add( keras.layers.Dense(1500, activation='relu'))
 model.add( keras.layers.Dropout(0.5))

 model.add( keras.layers.Dense(43, activation='softmax'))
 return model
 
# A more sophisticated model
#
def get_model_v2(lx,ly,lz):
 model = keras.models.Sequential()

 model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
 model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
 model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
 model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
 model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add( keras.layers.Dropout(0.2))

 model.add( keras.layers.Flatten())
 model.add( keras.layers.Dense(512, activation='relu'))
 model.add( keras.layers.Dropout(0.5))
 model.add( keras.layers.Dense(43, activation='softmax'))
 return model

def get_model_v3(lx,ly,lz):
 model = keras.models.Sequential()
 model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(lx,ly,lz)))
 model.add(tf.keras.layers.BatchNormalization(axis=-1)) 
 model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add(tf.keras.layers.Dropout(0.2))

 model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'))
 model.add(tf.keras.layers.BatchNormalization(axis=-1))
 model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
 model.add(tf.keras.layers.BatchNormalization(axis=-1))
 model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add(tf.keras.layers.Dropout(0.2))

 model.add(tf.keras.layers.Flatten())
 model.add(tf.keras.layers.Dense(512, activation='relu'))
 model.add(tf.keras.layers.BatchNormalization())
 model.add(tf.keras.layers.Dropout(0.4))

 model.add(tf.keras.layers.Dense(43, activation='softmax'))
 return model

## Step 5 - Multiple datasets, multiple models ;-)

In [15]:
def multi_run(dataset_dir, datasets, models, datagen=None,
 train_size=1, test_size=1, batch_size=64, epochs=16, 
 verbose=0, extension_dir='last'):
 """
 Launches a dataset-model combination
 args:
 dataset_dir : Directory of the datasets
 datasets : List of dataset (whitout .h5)
 models : List of model like { "model name":get_model(), ...}
 datagen : Data generator or None (None)
 train_size : % of train dataset to use. 1 mean all. (1)
 test_size : % of test dataset to use. 1 mean all. (1)
 batch_size : Batch size (64)
 epochs : Number of epochs (16)
 verbose : Verbose level (0)
 extension_dir : postfix for logs and models dir (_last)
 return:
 report : Report as a dict for Pandas.
 """
 # ---- Logs and models dir
 #
 os.makedirs(f'./run/logs_{extension_dir}', mode=0o750, exist_ok=True)
 os.makedirs(f'./run/models_{extension_dir}', mode=0o750, exist_ok=True)
 
 # ---- Columns of output
 #
 output={}
 output['Dataset'] = []
 output['Size'] = []
 for m in models:
 output[m+'_Accuracy'] = []
 output[m+'_Duration'] = []

 # ---- Let's go
 #
 for d_name in datasets:
 print("\nDataset : ",d_name)

 # ---- Read dataset
 x_train,y_train,x_test,y_test = read_dataset(dataset_dir, d_name)
 d_size=os.path.getsize(f'{dataset_dir}/{d_name}.h5')/(1024*1024)
 output['Dataset'].append(d_name)
 output['Size'].append(d_size)
 
 # ---- Get the shape
 (n,lx,ly,lz) = x_train.shape
 n_train = int( x_train.shape[0] * train_size )
 n_test = int( x_test.shape[0] * test_size )

 # ---- For each model
 for m_name,m_function in models.items():
 print(" Run model {} : ".format(m_name), end='')
 # ---- get model
 try:
 model=m_function(lx,ly,lz)
 # ---- Compile it
 model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
 # ---- Callbacks tensorboard
 log_dir = f"./run/logs_{extension_dir}/tb_{d_name}_{m_name}"
 tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
 # ---- Callbacks bestmodel
 save_dir = f"./run/models_{extension_dir}/model_{d_name}_{m_name}.h5"
 bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
 # ---- Train
 start_time = time.time()
 if datagen==None:
 # ---- No data augmentation (datagen=None) --------------------------------------
 history = model.fit(x_train[:n_train], y_train[:n_train],
 batch_size = batch_size,
 epochs = epochs,
 verbose = verbose,
 validation_data = (x_test[:n_test], y_test[:n_test]),
 callbacks = [tensorboard_callback, bestmodel_callback])
 else:
 # ---- Data augmentation (datagen given) ----------------------------------------
 datagen.fit(x_train)
 history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
 steps_per_epoch = int(n_train/batch_size),
 epochs = epochs,
 verbose = verbose,
 validation_data = (x_test[:n_test], y_test[:n_test]),
 callbacks = [tensorboard_callback, bestmodel_callback])
 
 # ---- Result
 end_time = time.time()
 duration = end_time-start_time
 accuracy = max(history.history["val_accuracy"])*100
 #
 output[m_name+'_Accuracy'].append(accuracy)
 output[m_name+'_Duration'].append(duration)
 print(f"Accuracy={accuracy:.2f} and Duration={duration:.2f}")
 except:
 output[m_name+'_Accuracy'].append('0')
 output[m_name+'_Duration'].append('999')
 print('-')
 return output

## Step 6 - Run !

In [22]:
start_time = time.time()

print('\n---- Run','-'*50)

# --------- Datasets, models, and more.. -----------------------------------
#
# ---- For tests
# datasets = ['set-24x24-L', 'set-24x24-RGB']
# models = {'v1':get_model_v1, 'v4':get_model_v2}
# batch_size = 64
# epochs = 2
# train_size = 0.1
# test_size = 0.1
# with_datagen = False
# verbose = 0
#
# ---- All possibilities
# datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']
# models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}
# batch_size = 64
# epochs = 16
# train_size = 1
# test_size = 1
# with_datagen = False
# verbose = 0
#
# ---- Data augmentation
datasets = ['set-48x48-RGB']
models = {'v2':get_model_v2}
batch_size = 64
epochs = 20
train_size = 1
test_size = 1
with_datagen = True
verbose = 0
#
# ---------------------------------------------------------------------------

# ---- Data augmentation
#
if with_datagen :
 datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
 featurewise_std_normalization=False,
 width_shift_range=0.1,
 height_shift_range=0.1,
 zoom_range=0.2,
 shear_range=0.1,
 rotation_range=10.)
else:
 datagen=None
 
# ---- Run
#
output = multi_run(dataset_dir,
 datasets, models,
 datagen=datagen,
 train_size=train_size, test_size=test_size,
 batch_size=batch_size, epochs=epochs,
 verbose=verbose,
 extension_dir=tag_id)

# ---- Save report
#
report={}
report['output']=output
report['description']='train_size={} test_size={} batch_size={} epochs={} data_aug={}'.format(train_size,test_size,batch_size,epochs,with_datagen)

report_name=f'./run/report_{tag_id}.json'

with open(report_name, 'w') as file:
 json.dump(report, file)

print('\nReport saved as ',report_name)
end_time = time.time()
duration = end_time-start_time
print(f'Duration : {duration:.2f} s')
print('-'*59)



---- Run --------------------------------------------------

Dataset : set-24x24-L
 Run model v1 : Accuracy=39.98 and Duration=2.23
 Run model v4 : Accuracy=6.18 and Duration=2.17

Dataset : set-24x24-RGB
 Run model v1 : Accuracy=53.52 and Duration=2.20
 Run model v4 : Accuracy=11.80 and Duration=2.01

Report saved as ./run/report_083052.json
Duration : 10.37 s
-----------------------------------------------------------


## Step 7 - That's all folks..

In [None]:
print('\n{}'.format(time.strftime("%A %-d %B %Y, %H:%M:%S")))
print("The work is done.\n")

---
<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>