{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "German Traffic Sign Recognition Benchmark (GTSRB)\n", "=================================================\n", "---\n", "Introduction au Deep Learning (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020 \n", "\n", "## Episode 5 : Full Convolutions\n", "\n", "Our main steps:\n", " - Try n models with n datasets\n", " - Save a Pandas/h5 report\n", " - Write to be run in batch mode\n", "\n", "## 1/ Import" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "from tensorflow import keras\n", "\n", "import numpy as np\n", "import h5py\n", "import os,time,json\n", "\n", "from IPython.display import display\n", "\n", "VERSION='1.2'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2/ Init and start" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('\\nFull Convolutions Notebook')\n", "print(' Version : {}'.format(VERSION))\n", "print(' Run time : {}'.format(time.strftime(\"%A %-d %B %Y, %H:%M:%S\")))\n", "print(' TensorFlow version :',tf.__version__)\n", "print(' Keras version :',tf.keras.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3/ Dataset loading" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def read_dataset(name):\n", " '''Reads h5 dataset from ./data\n", "\n", " Arguments: dataset name, without .h5\n", " Returns: x_train,y_train,x_test,y_test data'''\n", " # ---- Read dataset\n", " filename='./data/'+name+'.h5'\n", " with h5py.File(filename) as f:\n", " x_train = f['x_train'][:]\n", " y_train = f['y_train'][:]\n", " x_test = f['x_test'][:]\n", " y_test = f['y_test'][:]\n", "\n", " return x_train,y_train,x_test,y_test" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4/ Models collection" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# A basic model\n", "#\n", "def get_model_v1(lx,ly,lz):\n", " \n", " model = keras.models.Sequential()\n", " \n", " model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))\n", " model.add( keras.layers.MaxPooling2D((2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D((2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Flatten()) \n", " model.add( keras.layers.Dense(1500, activation='relu'))\n", " model.add( keras.layers.Dropout(0.5))\n", "\n", " model.add( keras.layers.Dense(43, activation='softmax'))\n", " return model\n", " \n", "# A more sophisticated model\n", "#\n", "def get_model_v2(lx,ly,lz):\n", " model = keras.models.Sequential()\n", "\n", " model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))\n", " model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))\n", " model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))\n", " model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", " model.add( keras.layers.Dropout(0.2))\n", "\n", " model.add( keras.layers.Flatten())\n", " model.add( keras.layers.Dense(512, activation='relu'))\n", " model.add( keras.layers.Dropout(0.5))\n", " model.add( keras.layers.Dense(43, activation='softmax'))\n", " return model\n", "\n", "# My sphisticated model, but small and fast\n", "#\n", "def get_model_v3(lx,ly,lz):\n", " model = keras.models.Sequential()\n", " model.add( keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(lx,ly,lz)))\n", " model.add( keras.layers.MaxPooling2D((2, 2)))\n", " model.add( keras.layers.Dropout(0.5))\n", "\n", " model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D((2, 2)))\n", " model.add( keras.layers.Dropout(0.5))\n", "\n", " model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D((2, 2)))\n", " model.add( keras.layers.Dropout(0.5))\n", "\n", " model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))\n", " model.add( keras.layers.MaxPooling2D((2, 2)))\n", " model.add( keras.layers.Dropout(0.5))\n", "\n", " model.add( keras.layers.Flatten()) \n", " model.add( keras.layers.Dense(1152, activation='relu'))\n", " model.add( keras.layers.Dropout(0.5))\n", "\n", " model.add( keras.layers.Dense(43, activation='softmax'))\n", " return model\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5/ Multiple datasets, multiple models ;-)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def multi_run(datasets, models, batch_size=64, epochs=16):\n", "\n", " # ---- Columns of report\n", " #\n", " report={}\n", " report['Dataset']=[]\n", " report['Size'] =[]\n", " for m in models:\n", " report[m+' Accuracy'] = []\n", " report[m+' Duration'] = []\n", "\n", " # ---- Let's go\n", " #\n", " for d_name in datasets:\n", " print(\"\\nDataset : \",d_name)\n", "\n", " # ---- Read dataset\n", " x_train,y_train,x_test,y_test = read_dataset(d_name)\n", " d_size=os.path.getsize('./data/'+d_name+'.h5')/(1024*1024)\n", " report['Dataset'].append(d_name)\n", " report['Size'].append(d_size)\n", " \n", " # ---- Get the shape\n", " (n,lx,ly,lz) = x_train.shape\n", "\n", " # ---- For each model\n", " for m_name,m_function in models.items():\n", " print(\" Run model {} : \".format(m_name), end='')\n", " # ---- get model\n", " try:\n", " model=m_function(lx,ly,lz)\n", " # ---- Compile it\n", " model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n", " # ---- Callbacks tensorboard\n", " log_dir = \"./run/logs/tb_{}_{}\".format(d_name,m_name)\n", " tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n", " # ---- Callbacks bestmodel\n", " save_dir = \"./run/models/model_{}_{}.h5\".format(d_name,m_name)\n", " bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)\n", " # ---- Train\n", " start_time = time.time()\n", " history = model.fit( x_train, y_train,\n", " batch_size = batch_size,\n", " epochs = epochs,\n", " verbose = 0,\n", " validation_data = (x_test, y_test),\n", " callbacks = [tensorboard_callback, bestmodel_callback])\n", " # ---- Result\n", " end_time = time.time()\n", " duration = end_time-start_time\n", " accuracy = max(history.history[\"val_accuracy\"])*100\n", " #\n", " report[m_name+' Accuracy'].append(accuracy)\n", " report[m_name+' Duration'].append(duration)\n", " print(\"Accuracy={:.2f} and Duration={:.2f})\".format(accuracy,duration))\n", " except:\n", " report[m_name+' Accuracy'].append('-')\n", " report[m_name+' Duration'].append('-')\n", " print('-')\n", " return report" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6/ Run\n", "### 6.1/ Clean" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "/bin/rm -r ./run/logs 2>/dev/null\n", "/bin/rm -r ./run/models 2>/dev/null\n", "/bin/mkdir -p -m 755 ./run/logs\n", "/bin/mkdir -p -m 755 ./run/models\n", "echo -e \"\\nReset directories : ./run/logs and ./run/models .\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6.2/ run and save report" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%time\n", "\n", "print('\\n---- Run','-'*50)\n", "\n", "# ---- Datasets and models list\n", "\n", "# For tests\n", "# datasets = ['set-24x24-L', 'set-24x24-RGB']\n", "# models = {'v1':get_model_v1, 'v3':get_model_v3}\n", "\n", "# The real one\n", "datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']\n", "models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}\n", "\n", "# ---- Report name\n", "\n", "report_name='./run/report-{}.json'.format(time.strftime(\"%Y-%m-%d_%Hh%Mm%Ss\"))\n", "\n", "# ---- Run\n", "\n", "out = multi_run(datasets, models, batch_size=64, epochs=2)\n", "\n", "# ---- Save report\n", "\n", "with open(report_name, 'w') as outfile:\n", " json.dump(out, outfile)\n", "\n", "print('\\nReport saved as ',report_name)\n", "print('-'*59)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 7/ That's all folks.." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('\\n{}'.format(time.strftime(\"%A %-d %B %Y, %H:%M:%S\")))\n", "print(\"The work is done.\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 4 }