{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "German Traffic Sign Recognition Benchmark (GTSRB)\n", "=================================================\n", "---\n", "Introduction au Deep Learning (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020 \n", "\n", "## Episode 5 : Full Convolutions\n", "\n", "Our main steps:\n", " - Try n models with n datasets\n", " - Save a Pandas/h5 report\n", " - Write to be run in batch mode\n", "\n", "## 1/ Import" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "from tensorflow import keras\n", "\n", "import numpy as np\n", "import h5py\n", "import os,time,json\n", "import random\n", "\n", "from IPython.display import display\n", "\n", "VERSION='1.6'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2/ Init and start" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Full Convolutions Notebook\n", " Version : 1.6\n", " Now is : Tuesday 21 January 2020 - 00h11m24s\n", " OAR id : ???\n", " Tag id : 077605\n", " Working directory : /home/pjluc/dev/fidle/GTSRB\n", " TensorFlow version : 2.0.0\n", " Keras version : 2.2.4-tf\n", " for tensorboard : --logdir /home/pjluc/dev/fidle/GTSRB/run/logs_077605\n" ] } ], "source": [ "# ---- Where I am ?\n", "now = time.strftime(\"%A %d %B %Y - %Hh%Mm%Ss\")\n", "here = os.getcwd()\n", "random.seed(time.time())\n", "tag_id = '{:06}'.format(random.randint(0,99999))\n", "\n", "# ---- Who I am ?\n", "if 'OAR_JOB_ID' in os.environ:\n", " oar_id=os.environ['OAR_JOB_ID']\n", "else:\n", " oar_id='???'\n", "\n", "print('\\nFull Convolutions Notebook')\n", "print(' Version : {}'.format(VERSION))\n", "print(' Now is : {}'.format(now))\n", "print(' OAR id : {}'.format(oar_id))\n", "print(' Tag id : {}'.format(tag_id))\n", "print(' Working directory : {}'.format(here))\n", "print(' TensorFlow version :',tf.__version__)\n", "print(' Keras version :',tf.keras.__version__)\n", "print(' for tensorboard : --logdir {}/run/logs_{}'.format(here,tag_id))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3/ Dataset loading" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def read_dataset(name):\n", " '''Reads h5 dataset from ./data\n", "\n", " Arguments: dataset name, without .h5\n", " Returns: x_train,y_train,x_test,y_test data'''\n", " # ---- Read dataset\n", " filename='./data/'+name+'.h5'\n", " with h5py.File(filename,'r') as f:\n", " x_train = f['x_train'][:]\n", " y_train = f['y_train'][:]\n", " x_test = f['x_test'][:]\n", " y_test = f['y_test'][:]\n", "\n", " return x_train,y_train,x_test,y_test" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4/ Models collection" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "\n", "# A basic model\n", "#\n", "def get_model_v1(lx,ly,lz):\n", " \n", " model = keras.models.Sequential()\n", " \n", " model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))\n", " model.add( keras.layers.MaxPooling2D((2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D((2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Flatten()) \n", " model.add( keras.layers.Dense(1500, activation='relu'))\n", " model.add( keras.layers.Dropout(0.5))\n", "\n", " model.add( keras.layers.Dense(43, activation='softmax'))\n", " return model\n", " \n", "# A more sophisticated model\n", "#\n", "def get_model_v2(lx,ly,lz):\n", " model = keras.models.Sequential()\n", "\n", " model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))\n", " model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))\n", " model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))\n", " model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Flatten())\n", " model.add( keras.layers.Dense(512, activation='relu'))\n", " model.add( keras.layers.Dropout(0.5))\n", " model.add( keras.layers.Dense(43, activation='softmax'))\n", " return model\n", "\n", "def get_model_v3(lx,ly,lz):\n", " model = keras.models.Sequential()\n", " model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(lx,ly,lz)))\n", " model.add(tf.keras.layers.BatchNormalization(axis=-1)) \n", " model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", " model.add(tf.keras.layers.Dropout(0.2))\n", "\n", " model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'))\n", " model.add(tf.keras.layers.BatchNormalization(axis=-1))\n", " model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))\n", " model.add(tf.keras.layers.BatchNormalization(axis=-1))\n", " model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", " model.add(tf.keras.layers.Dropout(0.2))\n", "\n", " model.add(tf.keras.layers.Flatten())\n", " model.add(tf.keras.layers.Dense(512, activation='relu'))\n", " model.add(tf.keras.layers.BatchNormalization())\n", " model.add(tf.keras.layers.Dropout(0.4))\n", "\n", " model.add(tf.keras.layers.Dense(43, activation='softmax'))\n", " return model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5/ Multiple datasets, multiple models ;-)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def multi_run(datasets, models, datagen=None,\n", " train_size=1, test_size=1, batch_size=64, epochs=16, \n", " verbose=0, extension_dir='last'):\n", "\n", " # ---- Logs and models dir\n", " #\n", " os.makedirs('./run/logs_{}'.format(extension_dir), mode=0o750, exist_ok=True)\n", " os.makedirs('./run/models_{}'.format(extension_dir), mode=0o750, exist_ok=True)\n", " \n", " # ---- Columns of output\n", " #\n", " output={}\n", " output['Dataset']=[]\n", " output['Size'] =[]\n", " for m in models:\n", " output[m+'_Accuracy'] = []\n", " output[m+'_Duration'] = []\n", "\n", " # ---- Let's go\n", " #\n", " for d_name in datasets:\n", " print(\"\\nDataset : \",d_name)\n", "\n", " # ---- Read dataset\n", " x_train,y_train,x_test,y_test = read_dataset(d_name)\n", " d_size=os.path.getsize('./data/'+d_name+'.h5')/(1024*1024)\n", " output['Dataset'].append(d_name)\n", " output['Size'].append(d_size)\n", " \n", " # ---- Get the shape\n", " (n,lx,ly,lz) = x_train.shape\n", " n_train = int(x_train.shape[0]*train_size)\n", " n_test = int(x_test.shape[0]*test_size)\n", "\n", " # ---- For each model\n", " for m_name,m_function in models.items():\n", " print(\" Run model {} : \".format(m_name), end='')\n", " # ---- get model\n", " try:\n", " model=m_function(lx,ly,lz)\n", " # ---- Compile it\n", " model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n", " # ---- Callbacks tensorboard\n", " log_dir = \"./run/logs_{}/tb_{}_{}\".format(extension_dir, d_name, m_name)\n", " tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n", " # ---- Callbacks bestmodel\n", " save_dir = \"./run/models_{}/model_{}_{}.h5\".format(extension_dir, d_name, m_name)\n", " bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)\n", " # ---- Train\n", " start_time = time.time()\n", " if datagen==None:\n", " # ---- No data augmentation (datagen=None) --------------------------------------\n", " history = model.fit(x_train[:n_train], y_train[:n_train],\n", " batch_size = batch_size,\n", " epochs = epochs,\n", " verbose = verbose,\n", " validation_data = (x_test[:n_test], y_test[:n_test]),\n", " callbacks = [tensorboard_callback, bestmodel_callback])\n", " else:\n", " # ---- Data augmentation (datagen given) ----------------------------------------\n", " datagen.fit(x_train)\n", " history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),\n", " steps_per_epoch = int(n_train/batch_size),\n", " epochs = epochs,\n", " verbose = verbose,\n", " validation_data = (x_test[:n_test], y_test[:n_test]),\n", " callbacks = [tensorboard_callback, bestmodel_callback])\n", " \n", " # ---- Result\n", " end_time = time.time()\n", " duration = end_time-start_time\n", " accuracy = max(history.history[\"val_accuracy\"])*100\n", " #\n", " output[m_name+'_Accuracy'].append(accuracy)\n", " output[m_name+'_Duration'].append(duration)\n", " print(\"Accuracy={:.2f} and Duration={:.2f})\".format(accuracy,duration))\n", " except:\n", " output[m_name+'_Accuracy'].append('0')\n", " output[m_name+'_Duration'].append('999')\n", " print('-')\n", " return output" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6/ Run !" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "start_time = time.time()\n", "\n", "print('\\n---- Run','-'*50)\n", "\n", "# --------- Datasets, models, and more.. -----------------------------------\n", "#\n", "# ---- For tests\n", "# datasets = ['set-24x24-L', 'set-24x24-RGB']\n", "# models = {'v1':get_model_v1, 'v4':get_model_v2}\n", "# batch_size = 64\n", "# epochs = 2\n", "# train_size = 0.1\n", "# test_size = 0.1\n", "# with_datagen = False\n", "# verbose = 0\n", "#\n", "# ---- All possibilities -> Run A\n", "# datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']\n", "# models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}\n", "# batch_size = 64\n", "# epochs = 16\n", "# train_size = 1\n", "# test_size = 1\n", "# with_datagen = False\n", "# verbose = 0\n", "#\n", "# ---- Data augmentation -> Run B\n", "datasets = ['set-48x48-RGB']\n", "models = {'v2':get_model_v2}\n", "batch_size = 64\n", "epochs = 20\n", "train_size = 1\n", "test_size = 1\n", "with_datagen = True\n", "verbose = 0\n", "#\n", "# ---------------------------------------------------------------------------\n", "\n", "# ---- Data augmentation\n", "#\n", "if with_datagen :\n", " datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,\n", " featurewise_std_normalization=False,\n", " width_shift_range=0.1,\n", " height_shift_range=0.1,\n", " zoom_range=0.2,\n", " shear_range=0.1,\n", " rotation_range=10.)\n", "else:\n", " datagen=None\n", " \n", "# ---- Run\n", "#\n", "output = multi_run(datasets, models,\n", " datagen=datagen,\n", " train_size=train_size, test_size=test_size,\n", " batch_size=batch_size, epochs=epochs,\n", " verbose=verbose,\n", " extension_dir=tag_id)\n", "\n", "# ---- Save report\n", "#\n", "report={}\n", "report['output']=output\n", "report['description']='train_size={} test_size={} batch_size={} epochs={} data_aug={}'.format(train_size,test_size,batch_size,epochs,with_datagen)\n", "\n", "report_name='./run/report_{}.json'.format(tag_id)\n", "\n", "with open(report_name, 'w') as file:\n", " json.dump(report, file)\n", "\n", "print('\\nReport saved as ',report_name)\n", "end_time = time.time()\n", "duration = end_time-start_time\n", "print('Duration : {} s'.format(duration))\n", "print('-'*59)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 7/ That's all folks.." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('\\n{}'.format(time.strftime(\"%A %-d %B %Y, %H:%M:%S\")))\n", "print(\"The work is done.\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 4 }