German Traffic Sign Recognition Benchmark (GTSRB)
=================================================
---
Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020  

## Episode 5 : Full Convolutions

Our main steps:
 - Try n models with n datasets
 - Save a Pandas/h5 report
 - Write to be run in batch mode

## 1/ Import

In [None]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import h5py
import os,time,json
import random

from IPython.display import display

VERSION='1.6'

## 2/ Init and start

In [None]:
# ---- Where I am ?
now    = time.strftime("%A %d %B %Y - %Hh%Mm%Ss")
here   = os.getcwd()
random.seed(time.time())
tag_id = '{:06}'.format(random.randint(0,99999))

# ---- Who I am ?
if 'OAR_JOB_ID' in os.environ:
    oar_id=os.environ['OAR_JOB_ID']
else:
    oar_id='???'

print('\nFull Convolutions Notebook')
print('  Version            : {}'.format(VERSION))
print('  Now is             : {}'.format(now))
print('  OAR id             : {}'.format(oar_id))
print('  Tag id             : {}'.format(tag_id))
print('  Working directory  : {}'.format(here))
print('  TensorFlow version :',tf.__version__)
print('  Keras version      :',tf.keras.__version__)
print('  for tensorboard    : --logdir {}/run/logs_{}'.format(here,tag_id))

## 3/ Dataset loading

In [None]:
def read_dataset(name):
    '''Reads h5 dataset from ./data

    Arguments:  dataset name, without .h5
    Returns:    x_train,y_train,x_test,y_test data'''
    # ---- Read dataset
    filename='./data/'+name+'.h5'
    with  h5py.File(filename,'r') as f:
        x_train = f['x_train'][:]
        y_train = f['y_train'][:]
        x_test  = f['x_test'][:]
        y_test  = f['y_test'][:]

    return x_train,y_train,x_test,y_test

## 4/ Models collection

In [None]:

# A basic model
#
def get_model_v1(lx,ly,lz):
    
    model = keras.models.Sequential()
    
    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Flatten()) 
    model.add( keras.layers.Dense(1500, activation='relu'))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Dense(43, activation='softmax'))
    return model
    
# A more sophisticated model
#
def get_model_v2(lx,ly,lz):
    model = keras.models.Sequential()

    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Flatten())
    model.add( keras.layers.Dense(512, activation='relu'))
    model.add( keras.layers.Dropout(0.5))
    model.add( keras.layers.Dense(43, activation='softmax'))
    return model

def get_model_v3(lx,ly,lz):
    model = keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same',  activation='relu', input_shape=(lx,ly,lz)))
    model.add(tf.keras.layers.BatchNormalization(axis=-1))      
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same',  activation='relu'))
    model.add(tf.keras.layers.BatchNormalization(axis=-1))
    model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
    model.add(tf.keras.layers.BatchNormalization(axis=-1))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(512, activation='relu'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(0.4))

    model.add(tf.keras.layers.Dense(43, activation='softmax'))
    return model

## 5/ Multiple datasets, multiple models ;-)

In [None]:
def multi_run(datasets, models, datagen=None,
              train_size=1, test_size=1, batch_size=64, epochs=16, 
              verbose=0, extension_dir='last'):

    # ---- Logs and models dir
    #
    os.makedirs('./run/logs_{}'.format(extension_dir),   mode=0o750, exist_ok=True)
    os.makedirs('./run/models_{}'.format(extension_dir), mode=0o750, exist_ok=True)
    
    # ---- Columns of output
    #
    output={}
    output['Dataset']=[]
    output['Size']   =[]
    for m in models:
        output[m+'_Accuracy'] = []
        output[m+'_Duration'] = []

    # ---- Let's go
    #
    for d_name in datasets:
        print("\nDataset : ",d_name)

        # ---- Read dataset
        x_train,y_train,x_test,y_test = read_dataset(d_name)
        d_size=os.path.getsize('./data/'+d_name+'.h5')/(1024*1024)
        output['Dataset'].append(d_name)
        output['Size'].append(d_size)
        
        # ---- Get the shape
        (n,lx,ly,lz) = x_train.shape
        n_train = int(x_train.shape[0]*train_size)
        n_test  = int(x_test.shape[0]*test_size)

        # ---- For each model
        for m_name,m_function in models.items():
            print("    Run model {}  : ".format(m_name), end='')
            # ---- get model
            try:
                model=m_function(lx,ly,lz)
                # ---- Compile it
                model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
                # ---- Callbacks tensorboard
                log_dir = "./run/logs_{}/tb_{}_{}".format(extension_dir, d_name, m_name)
                tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
                # ---- Callbacks bestmodel
                save_dir = "./run/models_{}/model_{}_{}.h5".format(extension_dir, d_name, m_name)
                bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
                # ---- Train
                start_time = time.time()
                if datagen==None:
                    # ---- No data augmentation (datagen=None) --------------------------------------
                    history = model.fit(x_train[:n_train], y_train[:n_train],
                                        batch_size      = batch_size,
                                        epochs          = epochs,
                                        verbose         = verbose,
                                        validation_data = (x_test[:n_test], y_test[:n_test]),
                                        callbacks       = [tensorboard_callback, bestmodel_callback])
                else:
                    # ---- Data augmentation (datagen given) ----------------------------------------
                    datagen.fit(x_train)
                    history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
                                        steps_per_epoch = int(n_train/batch_size),
                                        epochs          = epochs,
                                        verbose         = verbose,
                                        validation_data = (x_test[:n_test], y_test[:n_test]),
                                        callbacks       = [tensorboard_callback, bestmodel_callback])
                    
                # ---- Result
                end_time = time.time()
                duration = end_time-start_time
                accuracy = max(history.history["val_accuracy"])*100
                #
                output[m_name+'_Accuracy'].append(accuracy)
                output[m_name+'_Duration'].append(duration)
                print("Accuracy={:.2f} and Duration={:.2f})".format(accuracy,duration))
            except:
                output[m_name+'_Accuracy'].append('0')
                output[m_name+'_Duration'].append('999')
                print('-')
    return output

## 6/ Run !

In [None]:
start_time = time.time()

print('\n---- Run','-'*50)

# --------- Datasets, models, and more.. -----------------------------------
#
# ---- For tests
# datasets   = ['set-24x24-L', 'set-24x24-RGB']
# models     = {'v1':get_model_v1, 'v4':get_model_v2}
# batch_size = 64
# epochs     = 2
# train_size = 0.1
# test_size  = 0.1
# with_datagen = False
# verbose      = 0
#
# ---- All possibilities -> Run A
# datasets     = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']
# models       = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}
# batch_size   = 64
# epochs       = 16
# train_size   = 1
# test_size    = 1
# with_datagen = False
# verbose      = 0
#
# ---- Data augmentation -> Run B
datasets     = ['set-48x48-RGB']
models       = {'v2':get_model_v2}
batch_size   = 64
epochs       = 20
train_size   = 1
test_size    = 1
with_datagen = True
verbose      = 0
#
# ---------------------------------------------------------------------------

# ---- Data augmentation
#
if with_datagen :
    datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,
                                                           featurewise_std_normalization=False,
                                                           width_shift_range=0.1,
                                                           height_shift_range=0.1,
                                                           zoom_range=0.2,
                                                           shear_range=0.1,
                                                           rotation_range=10.)
else:
    datagen=None
    
# ---- Run
#
output = multi_run(datasets, models,
                   datagen=datagen,
                   train_size=train_size, test_size=test_size,
                   batch_size=batch_size, epochs=epochs,
                   verbose=verbose,
                   extension_dir=tag_id)

# ---- Save report
#
report={}
report['output']=output
report['description']='train_size={} test_size={} batch_size={} epochs={} data_aug={}'.format(train_size,test_size,batch_size,epochs,with_datagen)

report_name='./run/report_{}.json'.format(tag_id)

with open(report_name, 'w') as file:
    json.dump(report, file)

print('\nReport saved as ',report_name)
end_time = time.time()
duration = end_time-start_time
print(f'Duration : {duration:.2f} s')
print('-'*59)


## 7/ That's all folks..

In [None]:
print('\n{}'.format(time.strftime("%A %-d %B %Y, %H:%M:%S")))
print("The work is done.\n")