Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img width=\"800px\" src=\"../fidle/img/00-Fidle-header-01.svg\"></img>\n",
"# <!-- TITLE --> [GTS5] - CNN with GTSRB dataset - Full convolutions \n",
"<!-- DESC --> Episode 5 : A lot of models, a lot of datasets and a lot of results.\n",
"<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
"\n",
"## Objectives :\n",
" - Try multiple solutions\n",
" - Design a generic and batch-usable code\n",
" \n",
"The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes. \n",
"The final aim is to recognise them ! \n",
"Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset\n",
"\n",
"Our main steps:\n",
" - Try n models with n datasets\n",
" - Write to be run in batch mode\n",
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
{
"data": {
"text/html": [
"<style>\n",
"\n",
"div.warn { \n",
" background-color: #fcf2f2;\n",
" border-color: #dFb5b4;\n",
" border-left: 5px solid #dfb5b4;\n",
" padding: 0.5em;\n",
" font-weight: bold;\n",
" font-size: 1.1em;;\n",
" }\n",
"\n",
"\n",
"\n",
"div.nota { \n",
" background-color: #DAFFDE;\n",
" border-left: 5px solid #92CC99;\n",
" padding: 0.5em;\n",
" }\n",
"\n",
"div.todo:before { content:url();\n",
" float:left;\n",
" margin-right:20px;\n",
" margin-top:-20px;\n",
" margin-bottom:20px;\n",
"}\n",
"div.todo{\n",
" font-weight: bold;\n",
" font-size: 1.1em;\n",
" margin-top:40px;\n",
"}\n",
"div.todo ul{\n",
" margin: 0.2em;\n",
"}\n",
"div.todo li{\n",
" margin-left:60px;\n",
" margin-top:0;\n",
" margin-bottom:0;\n",
"}\n",
"\n",
"div .comment{\n",
" font-size:0.8em;\n",
" color:#696969;\n",
"}\n",
"\n",
"\n",
"\n",
"</style>\n",
"\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"**FIDLE 2020 - Practical Work Module**"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Version : 0.6.1 DEV\n",
"Notebook id : GTS5\n",
"Run time : Thursday 17 December 2020, 22:07:09\n",
"TensorFlow version : 2.1.0\n",
"Keras version : 2.2.4-tf\n",
"Datasets dir : /gpfswork/rech/mlh/uja62cb/datasets\n",
"Running mode : full\n",
"Update keras cache : False\n",
"Save figs : True\n",
"Path figs : ./run/figs\n"
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"import numpy as np\n",
"import h5py\n",
"import sys,os,time,json\n",
"import random\n",
"from IPython.display import display\n",
"sys.path.append('..')\n",
"import fidle.pwk as pwk\n",
"datasets_dir = pwk.init('GTS5')"
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Full Convolutions Notebook :\n",
" Now is : Thursday 17 December 2020 - 22h07m09s\n",
" SLURM id : 1874675\n",
" Tag id : 002079\n",
" Working directory : /gpfsdswork/projects/rech/mlh/uja62cb/fidle/GTSRB\n",
" Output directory : ./run\n",
" for tensorboard : --logdir /gpfsdswork/projects/rech/mlh/uja62cb/fidle/GTSRB/run/logs_002079\n"
"random.seed(time.time())\n",
"\n",
"# ---- Where I am ?\n",
"now = time.strftime(\"%A %d %B %Y - %Hh%Mm%Ss\")\n",
"here = os.getcwd()\n",
"tag_id = '{:06}'.format(random.randint(0,99999))\n",
"\n",
"# ---- Who I am ?\n",
"oar_id = os.getenv(\"OAR_JOB_ID\", \"??\")\n",
"slurm_id = os.getenv(\"SLURM_JOBID\", \"??\")\n",
"print('Full Convolutions Notebook :')\n",
"print(' Now is : {}'.format(now))\n",
"print(' OAR id : {}'.format(oar_id))\n",
"print(' SLURM id : {}'.format(slurm_id))\n",
"print(' Tag id : {}'.format(tag_id))\n",
"print(' Working directory : {}'.format(here))\n",
"print(' Output directory : ./run')\n",
"print(' for tensorboard : --logdir {}/run/logs_{}'.format(here,tag_id))"
"metadata": {},
"outputs": [],
"source": [
"# ---- Uncomment for batch tests\n",
"#\n",
"# print(\"\\n\\n*** Test mode - Exit before making big treatments... ***\\n\\n\")\n",
"# sys.exit()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"def read_dataset(dataset_dir, name):\n",
" '''Reads h5 dataset from dataset_dir\n",
" Args:\n",
" dataset_dir : datasets dir\n",
" name : dataset name, without .h5\n",
" Returns: x_train,y_train,x_test,y_test data'''\n",
" # ---- Read dataset\n",
" filename = f'{dataset_dir}/GTSRB/enhanced/{name}.h5'\n",
" size = os.path.getsize(filename)/(1024*1024)\n",
"\n",
" with h5py.File(filename,'r') as f:\n",
" x_train = f['x_train'][:]\n",
" y_train = f['y_train'][:]\n",
" x_test = f['x_test'][:]\n",
" y_test = f['y_test'][:]\n",
"\n",
" return x_train,y_train,x_test,y_test,size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
"metadata": {},
"outputs": [],
"source": [
"\n",
"# A basic model\n",
"#\n",
"def get_model_v1(lx,ly,lz):\n",
" \n",
" model = keras.models.Sequential()\n",
" \n",
" model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))\n",
" model.add( keras.layers.MaxPooling2D((2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D((2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Flatten()) \n",
" model.add( keras.layers.Dense(1500, activation='relu'))\n",
" model.add( keras.layers.Dropout(0.5))\n",
"\n",
" model.add( keras.layers.Dense(43, activation='softmax'))\n",
" return model\n",
" \n",
"# A more sophisticated model\n",
"#\n",
"def get_model_v2(lx,ly,lz):\n",
" model = keras.models.Sequential()\n",
"\n",
" model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))\n",
" model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))\n",
" model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))\n",
" model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Flatten())\n",
" model.add( keras.layers.Dense(512, activation='relu'))\n",
" model.add( keras.layers.Dropout(0.5))\n",
" model.add( keras.layers.Dense(43, activation='softmax'))\n",
" return model\n",
"\n",
"def get_model_v3(lx,ly,lz):\n",
" model = keras.models.Sequential()\n",
" model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(lx,ly,lz)))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1)) \n",
" model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add(tf.keras.layers.Dropout(0.2))\n",
"\n",
" model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1))\n",
" model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1))\n",
" model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add(tf.keras.layers.Dropout(0.2))\n",
"\n",
" model.add(tf.keras.layers.Flatten())\n",
" model.add(tf.keras.layers.Dense(512, activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization())\n",
" model.add(tf.keras.layers.Dropout(0.4))\n",
"\n",
" model.add(tf.keras.layers.Dense(43, activation='softmax'))\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 5 - Multiple datasets, multiple models ;-)"
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"def multi_run(datasets_dir, datasets, models, datagen=None,\n",
" train_size=1, test_size=1, batch_size=64, epochs=16, \n",
" verbose=0, extension_dir='last'):\n",
" \"\"\"\n",
" Launches a dataset-model combination\n",
" args:\n",
" datasets_dir : Directory of the datasets\n",
" datasets : List of dataset (whitout .h5)\n",
" models : List of model like { \"model name\":get_model(), ...}\n",
" datagen : Data generator or None (None)\n",
" train_size : % of train dataset to use. 1 mean all. (1)\n",
" test_size : % of test dataset to use. 1 mean all. (1)\n",
" batch_size : Batch size (64)\n",
" epochs : Number of epochs (16)\n",
" verbose : Verbose level (0)\n",
" extension_dir : postfix for logs and models dir (_last)\n",
" return:\n",
" report : Report as a dict for Pandas.\n",
" \"\"\"\n",
" # ---- Logs and models dir\n",
" #\n",
" os.makedirs(f'./run/logs_{extension_dir}', mode=0o750, exist_ok=True)\n",
" os.makedirs(f'./run/models_{extension_dir}', mode=0o750, exist_ok=True)\n",
" \n",
" # ---- Columns of output\n",
" output={}\n",
" output['Dataset'] = []\n",
" output['Size'] = []\n",
" for m in models:\n",
" output[m+'_Accuracy'] = []\n",
" output[m+'_Duration'] = []\n",
"\n",
" # ---- Let's go\n",
" #\n",
" for d_name in datasets:\n",
" print(\"\\nDataset : \",d_name)\n",
"\n",
" # ---- Read dataset\n",
" x_train,y_train,x_test,y_test, d_size = read_dataset(datasets_dir, d_name)\n",
" output['Dataset'].append(d_name)\n",
" output['Size'].append(d_size)\n",
" \n",
" # ---- Get the shape\n",
" (n,lx,ly,lz) = x_train.shape\n",
" n_train = int( x_train.shape[0] * train_size )\n",
" n_test = int( x_test.shape[0] * test_size )\n",
"\n",
" # ---- For each model\n",
" for m_name,m_function in models.items():\n",
" print(\" Run model {} : \".format(m_name), end='')\n",
" # ---- get model\n",
" try:\n",
" # ---- Compile it\n",
" model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
" log_dir = f\"./run/logs_{extension_dir}/tb_{d_name}_{m_name}\"\n",
" tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n",
" # ---- Callbacks bestmodel\n",
" save_dir = f\"./run/models_{extension_dir}/model_{d_name}_{m_name}.h5\"\n",
" bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)\n",
" # ---- Train\n",
" start_time = time.time()\n",
" if datagen==None:\n",
" # ---- No data augmentation (datagen=None) --------------------------------------\n",
" history = model.fit(x_train[:n_train], y_train[:n_train],\n",
" batch_size = batch_size,\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" validation_data = (x_test[:n_test], y_test[:n_test]),\n",
" callbacks = [tensorboard_callback, bestmodel_callback])\n",
" else:\n",
" # ---- Data augmentation (datagen given) ----------------------------------------\n",
" datagen.fit(x_train)\n",
" history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),\n",
" steps_per_epoch = int(n_train/batch_size),\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" validation_data = (x_test[:n_test], y_test[:n_test]),\n",
" callbacks = [tensorboard_callback, bestmodel_callback])\n",
" \n",
" # ---- Result\n",
" end_time = time.time()\n",
" duration = end_time-start_time\n",
" accuracy = max(history.history[\"val_accuracy\"])*100\n",
" #\n",
" output[m_name+'_Accuracy'].append(accuracy)\n",
" output[m_name+'_Duration'].append(duration)\n",
" print(f\"Accuracy={accuracy:.2f} and Duration={duration:.2f}\")\n",
" output[m_name+'_Accuracy'].append('0')\n",
" output[m_name+'_Duration'].append('999')\n",
" print('-')\n",
" return output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"---- Run --------------------------------------------------\n",
"\n",
"Dataset : set-24x24-L\n",
" Run model v1 : WARNING:tensorflow:Method (on_train_batch_end) is slow compared to the batch update (0.218721). Check your callbacks.\n",
"Accuracy=88.99 and Duration=8.01\n",
" Run model v2 : Accuracy=87.77 and Duration=4.63\n",
" Run model v3 : Accuracy=88.80 and Duration=5.25\n",
"\n",
"Dataset : set-24x24-RGB\n",
" Run model v1 : Accuracy=89.98 and Duration=5.12\n",
" Run model v2 : Accuracy=89.35 and Duration=4.56\n",
" Run model v3 : Accuracy=86.70 and Duration=5.22\n",
" Run model v1 : Accuracy=88.64 and Duration=18.32\n",
" Run model v2 : Accuracy=89.71 and Duration=10.17\n",
" Run model v3 : Accuracy=92.16 and Duration=11.10\n",
"Report saved as ./run/report_002079.json\n",
"Duration : 77.23 s\n",
"-----------------------------------------------------------\n"
]
}
],
"# --------- Datasets, models, and more.. -----------------------------------\n",
"#\n",
"# ---- For tests\n",
"datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-RGB']\n",
"models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}\n",
"batch_size = 64\n",
"epochs = 5\n",
"train_size = 0.2\n",
"test_size = 0.2\n",
"with_datagen = False\n",
"verbose = 0\n",
"#\n",
"# datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']\n",
"# models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}\n",
"# train_size = 1\n",
"# test_size = 1\n",
"# datasets = ['set-48x48-RGB']\n",
"# models = {'v2':get_model_v2}\n",
"# batch_size = 64\n",
"# epochs = 20\n",
"# train_size = 1\n",
"# test_size = 1\n",
"# with_datagen = True\n",
"# verbose = 0\n",
"# ---------------------------------------------------------------------------\n",
"# ---- Data augmentation\n",
"#\n",
"if with_datagen :\n",
" datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,\n",
" featurewise_std_normalization=False,\n",
" width_shift_range=0.1,\n",
" height_shift_range=0.1,\n",
" zoom_range=0.2,\n",
" shear_range=0.1,\n",
" rotation_range=10.)\n",
"else:\n",
" datagen=None\n",
" \n",
"#\n",
"output = multi_run(datasets_dir,\n",
" datasets, \n",
" models,\n",
" datagen = datagen,\n",
" train_size = train_size,\n",
" test_size = test_size,\n",
" batch_size = batch_size,\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" extension_dir = tag_id)\n",
"#\n",
"report={}\n",
"report['output']=output\n",
"report['description'] = f'train_size={train_size} test_size={test_size} batch_size={batch_size} epochs={epochs} data_aug={with_datagen}'\n",
"\n",
"report_name=f'./run/report_{tag_id}.json'\n",
"with open(report_name, 'w') as file:\n",
" json.dump(report, file)\n",
"end_time = time.time()\n",
"duration = end_time-start_time\n",
"print(f'Duration : {duration:.2f} s')\n",
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"End time is : Thursday 17 December 2020, 22:08:26\n",
"Duration is : 00:01:17 312ms\n",
"This notebook ends here\n"
]
},
{
"metadata": {},
"<img width=\"80px\" src=\"../fidle/img/00-Fidle-logo-01.svg\"></img>"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
}
},
"nbformat": 4,
"nbformat_minor": 4
}