Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"German Traffic Sign Recognition Benchmark (GTSRB)\n",
"=================================================\n",
"---\n",
"Introduction au Deep Learning (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020 \n",
"\n",
"\n",
"Our main steps:\n",
" - Try n models with n datasets\n",
" - Write to be run in batch mode\n",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"\n",
"import numpy as np\n",
"import h5py\n",
"import os,time,json\n",
"import random\n",
"\n",
"from IPython.display import display\n",
"\n",
"VERSION='1.6'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2/ Init and start"
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Full Convolutions Notebook\n",
" Version : 1.6\n",
" Now is : Tuesday 21 January 2020 - 00h11m24s\n",
" OAR id : ???\n",
" Tag id : 077605\n",
" Working directory : /home/pjluc/dev/fidle/GTSRB\n",
" TensorFlow version : 2.0.0\n",
" Keras version : 2.2.4-tf\n",
" for tensorboard : --logdir /home/pjluc/dev/fidle/GTSRB/run/logs_077605\n"
]
}
],
"# ---- Where I am ?\n",
"now = time.strftime(\"%A %d %B %Y - %Hh%Mm%Ss\")\n",
"here = os.getcwd()\n",
"random.seed(time.time())\n",
"tag_id = '{:06}'.format(random.randint(0,99999))\n",
"\n",
"# ---- Who I am ?\n",
"if 'OAR_JOB_ID' in os.environ:\n",
" oar_id=os.environ['OAR_JOB_ID']\n",
"else:\n",
" oar_id='???'\n",
"\n",
"print('\\nFull Convolutions Notebook')\n",
"print(' Version : {}'.format(VERSION))\n",
"print(' Now is : {}'.format(now))\n",
"print(' OAR id : {}'.format(oar_id))\n",
"print(' Tag id : {}'.format(tag_id))\n",
"print(' Working directory : {}'.format(here))\n",
"print(' Keras version :',tf.keras.__version__)\n",
"print(' for tensorboard : --logdir {}/run/logs_{}'.format(here,tag_id))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"def read_dataset(name):\n",
" '''Reads h5 dataset from ./data\n",
"\n",
" Arguments: dataset name, without .h5\n",
" Returns: x_train,y_train,x_test,y_test data'''\n",
" # ---- Read dataset\n",
" filename='./data/'+name+'.h5'\n",
" with h5py.File(filename,'r') as f:\n",
" x_train = f['x_train'][:]\n",
" y_train = f['y_train'][:]\n",
" x_test = f['x_test'][:]\n",
" y_test = f['y_test'][:]\n",
"\n",
" return x_train,y_train,x_test,y_test"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
"metadata": {},
"outputs": [],
"source": [
"\n",
"# A basic model\n",
"#\n",
"def get_model_v1(lx,ly,lz):\n",
" \n",
" model = keras.models.Sequential()\n",
" \n",
" model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))\n",
" model.add( keras.layers.MaxPooling2D((2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D((2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Flatten()) \n",
" model.add( keras.layers.Dense(1500, activation='relu'))\n",
" model.add( keras.layers.Dropout(0.5))\n",
"\n",
" model.add( keras.layers.Dense(43, activation='softmax'))\n",
" return model\n",
" \n",
"# A more sophisticated model\n",
"#\n",
"def get_model_v2(lx,ly,lz):\n",
" model = keras.models.Sequential()\n",
"\n",
" model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))\n",
" model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))\n",
" model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))\n",
" model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Flatten())\n",
" model.add( keras.layers.Dense(512, activation='relu'))\n",
" model.add( keras.layers.Dropout(0.5))\n",
" model.add( keras.layers.Dense(43, activation='softmax'))\n",
" return model\n",
"\n",
"def get_model_v3(lx,ly,lz):\n",
" model = keras.models.Sequential()\n",
" model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(lx,ly,lz)))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1)) \n",
" model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add(tf.keras.layers.Dropout(0.2))\n",
"\n",
" model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1))\n",
" model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1))\n",
" model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add(tf.keras.layers.Dropout(0.2))\n",
"\n",
" model.add(tf.keras.layers.Flatten())\n",
" model.add(tf.keras.layers.Dense(512, activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization())\n",
" model.add(tf.keras.layers.Dropout(0.4))\n",
"\n",
" model.add(tf.keras.layers.Dense(43, activation='softmax'))\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"def multi_run(datasets, models, datagen=None,\n",
" train_size=1, test_size=1, batch_size=64, epochs=16, \n",
" verbose=0, extension_dir='last'):\n",
" # ---- Logs and models dir\n",
" #\n",
" os.makedirs('./run/logs_{}'.format(extension_dir), mode=0o750, exist_ok=True)\n",
" os.makedirs('./run/models_{}'.format(extension_dir), mode=0o750, exist_ok=True)\n",
" \n",
" # ---- Columns of output\n",
" output={}\n",
" output['Dataset']=[]\n",
" output['Size'] =[]\n",
" for m in models:\n",
" output[m+'_Accuracy'] = []\n",
" output[m+'_Duration'] = []\n",
"\n",
" # ---- Let's go\n",
" #\n",
" for d_name in datasets:\n",
" print(\"\\nDataset : \",d_name)\n",
"\n",
" # ---- Read dataset\n",
" x_train,y_train,x_test,y_test = read_dataset(d_name)\n",
" d_size=os.path.getsize('./data/'+d_name+'.h5')/(1024*1024)\n",
" output['Dataset'].append(d_name)\n",
" output['Size'].append(d_size)\n",
" \n",
" # ---- Get the shape\n",
" (n,lx,ly,lz) = x_train.shape\n",
" n_train = int(x_train.shape[0]*train_size)\n",
" n_test = int(x_test.shape[0]*test_size)\n",
"\n",
" # ---- For each model\n",
" for m_name,m_function in models.items():\n",
" print(\" Run model {} : \".format(m_name), end='')\n",
" # ---- get model\n",
" try:\n",
" # ---- Compile it\n",
" model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
" log_dir = \"./run/logs_{}/tb_{}_{}\".format(extension_dir, d_name, m_name)\n",
" tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n",
" # ---- Callbacks bestmodel\n",
" save_dir = \"./run/models_{}/model_{}_{}.h5\".format(extension_dir, d_name, m_name)\n",
" bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)\n",
" # ---- Train\n",
" start_time = time.time()\n",
" if datagen==None:\n",
" # ---- No data augmentation (datagen=None) --------------------------------------\n",
" history = model.fit(x_train[:n_train], y_train[:n_train],\n",
" batch_size = batch_size,\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" validation_data = (x_test[:n_test], y_test[:n_test]),\n",
" callbacks = [tensorboard_callback, bestmodel_callback])\n",
" else:\n",
" # ---- Data augmentation (datagen given) ----------------------------------------\n",
" datagen.fit(x_train)\n",
" history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),\n",
" steps_per_epoch = int(n_train/batch_size),\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" validation_data = (x_test[:n_test], y_test[:n_test]),\n",
" callbacks = [tensorboard_callback, bestmodel_callback])\n",
" \n",
" # ---- Result\n",
" end_time = time.time()\n",
" duration = end_time-start_time\n",
" accuracy = max(history.history[\"val_accuracy\"])*100\n",
" #\n",
" output[m_name+'_Accuracy'].append(accuracy)\n",
" output[m_name+'_Duration'].append(duration)\n",
" print(\"Accuracy={:.2f} and Duration={:.2f})\".format(accuracy,duration))\n",
" except:\n",
" output[m_name+'_Accuracy'].append('0')\n",
" output[m_name+'_Duration'].append('999')\n",
" print('-')\n",
" return output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6/ Run !"
"execution_count": null,
"metadata": {},
"outputs": [],
"# --------- Datasets, models, and more.. -----------------------------------\n",
"#\n",
"# ---- For tests\n",
"# datasets = ['set-24x24-L', 'set-24x24-RGB']\n",
"# models = {'v1':get_model_v1, 'v4':get_model_v2}\n",
"# batch_size = 64\n",
"# epochs = 2\n",
"# train_size = 0.1\n",
"# test_size = 0.1\n",
"# with_datagen = False\n",
"# verbose = 0\n",
"#\n",
"# ---- All possibilities -> Run A\n",
"# datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']\n",
"# models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}\n",
"# batch_size = 64\n",
"# train_size = 1\n",
"# test_size = 1\n",
"# verbose = 0\n",
"#\n",
"# ---- Data augmentation -> Run B\n",
"datasets = ['set-48x48-RGB']\n",
"models = {'v2':get_model_v2}\n",
"batch_size = 64\n",
"epochs = 20\n",
"train_size = 1\n",
"test_size = 1\n",
"with_datagen = True\n",
"verbose = 0\n",
"#\n",
"# ---------------------------------------------------------------------------\n",
"# ---- Data augmentation\n",
"#\n",
"if with_datagen :\n",
" datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,\n",
" featurewise_std_normalization=False,\n",
" width_shift_range=0.1,\n",
" height_shift_range=0.1,\n",
" zoom_range=0.2,\n",
" shear_range=0.1,\n",
" rotation_range=10.)\n",
"else:\n",
" datagen=None\n",
" \n",
"#\n",
"output = multi_run(datasets, models,\n",
" datagen=datagen,\n",
" train_size=train_size, test_size=test_size,\n",
" batch_size=batch_size, epochs=epochs,\n",
" verbose=verbose,\n",
" extension_dir=tag_id)\n",
"#\n",
"report={}\n",
"report['output']=output\n",
"report['description']='train_size={} test_size={} batch_size={} epochs={} data_aug={}'.format(train_size,test_size,batch_size,epochs,with_datagen)\n",
"\n",
"report_name='./run/report_{}.json'.format(tag_id)\n",
"with open(report_name, 'w') as file:\n",
" json.dump(report, file)\n",
"end_time = time.time()\n",
"duration = end_time-start_time\n",
"print('Duration : {} s'.format(duration))\n",
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"execution_count": null,
"metadata": {},
"outputs": [],
"print('\\n{}'.format(time.strftime(\"%A %-d %B %Y, %H:%M:%S\")))\n",
"print(\"The work is done.\\n\")"
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}