Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img width=\"800px\" src=\"../fidle/img/00-Fidle-header-01.svg\"></img>\n",
"# <!-- TITLE --> [GTS5] - CNN with GTSRB dataset - Full convolutions \n",
"<!-- DESC --> Episode 5 : A lot of models, a lot of datasets and a lot of results.\n",
"<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
"\n",
"## Objectives :\n",
" - Try multiple solutions\n",
" - Design a generic and batch-usable code\n",
" \n",
"The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes. \n",
"The final aim is to recognise them ! \n",
"Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset\n",
"\n",
"Our main steps:\n",
" - Try n models with n datasets\n",
" - Write to be run in batch mode\n",
"## Step 1 - Import\n",
"### 1.1 - Python"
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"\n",
"import numpy as np\n",
"import h5py\n",
"import os,time,json\n",
"import random\n",
"\n",
"from IPython.display import display\n",
"\n",
"VERSION='1.6'"
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.2 - Where are we ? "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"We will use : dataset_dir=/bettik/PROJECTS/pr-fidle/datasets/GTSRB\n"
]
}
],
"source": [
"# At GRICAD\n",
"dataset_dir = '/bettik/PROJECTS/pr-fidle/datasets/GTSRB/'\n",
"\n",
"# At IDRIS\n",
"# dataset_dir = f'{os.getenv(\"WORK\",\"\")}/datasets/GTSRB'\n",
"\n",
"# At Home\n",
"# dataset_dir = f'{os.getenv(\"HOME\",\"\")}/datasets/GTSRB'\n",
"\n",
"print(f'We will use : dataset_dir={dataset_dir}')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Full Convolutions Notebook\n",
" Version : 1.6\n",
" Now is : Friday 28 February 2020 - 15h06m25s\n",
" OAR id : 5878410\n",
" SLURM id : ??\n",
" Tag id : 083052\n",
" Working directory : /home/paroutyj/fidle/GTSRB\n",
" Dataset_dir : /bettik/PROJECTS/pr-fidle/datasets/GTSRB\n",
" TensorFlow version : 2.0.0\n",
" Keras version : 2.2.4-tf\n",
" for tensorboard : --logdir /home/paroutyj/fidle/GTSRB/run/logs_083052\n"
]
}
],
"# ---- Where I am ?\n",
"now = time.strftime(\"%A %d %B %Y - %Hh%Mm%Ss\")\n",
"here = os.getcwd()\n",
"random.seed(time.time())\n",
"tag_id = '{:06}'.format(random.randint(0,99999))\n",
"\n",
"# ---- Who I am ?\n",
"oar_id = os.getenv(\"OAR_JOB_ID\", \"??\")\n",
"slurm_id = os.getenv(\"SLURM_JOBID\", \"??\")\n",
"print('\\nFull Convolutions Notebook')\n",
"print(' Version : {}'.format(VERSION))\n",
"print(' Now is : {}'.format(now))\n",
"print(' OAR id : {}'.format(oar_id))\n",
"print(' SLURM id : {}'.format(slurm_id))\n",
"print(' Tag id : {}'.format(tag_id))\n",
"print(' Working directory : {}'.format(here))\n",
"print(' Dataset_dir : {}'.format(dataset_dir))\n",
"print(' Keras version :',tf.keras.__version__)\n",
"print(' for tensorboard : --logdir {}/run/logs_{}'.format(here,tag_id))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"def read_dataset(dataset_dir, name):\n",
" '''Reads h5 dataset from dataset_dir\n",
" Args:\n",
" dataset_dir : datasets dir\n",
" name : dataset name, without .h5\n",
" Returns: x_train,y_train,x_test,y_test data'''\n",
" # ---- Read dataset\n",
" filename=f'{dataset_dir}/{name}.h5'\n",
" with h5py.File(filename,'r') as f:\n",
" x_train = f['x_train'][:]\n",
" y_train = f['y_train'][:]\n",
" x_test = f['x_test'][:]\n",
" y_test = f['y_test'][:]\n",
"\n",
" # ---- done\n",
" return x_train,y_train,x_test,y_test "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
"metadata": {},
"outputs": [],
"source": [
"\n",
"# A basic model\n",
"#\n",
"def get_model_v1(lx,ly,lz):\n",
" \n",
" model = keras.models.Sequential()\n",
" \n",
" model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))\n",
" model.add( keras.layers.MaxPooling2D((2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D((2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Flatten()) \n",
" model.add( keras.layers.Dense(1500, activation='relu'))\n",
" model.add( keras.layers.Dropout(0.5))\n",
"\n",
" model.add( keras.layers.Dense(43, activation='softmax'))\n",
" return model\n",
" \n",
"# A more sophisticated model\n",
"#\n",
"def get_model_v2(lx,ly,lz):\n",
" model = keras.models.Sequential()\n",
"\n",
" model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))\n",
" model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))\n",
" model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))\n",
" model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Flatten())\n",
" model.add( keras.layers.Dense(512, activation='relu'))\n",
" model.add( keras.layers.Dropout(0.5))\n",
" model.add( keras.layers.Dense(43, activation='softmax'))\n",
" return model\n",
"\n",
"def get_model_v3(lx,ly,lz):\n",
" model = keras.models.Sequential()\n",
" model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(lx,ly,lz)))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1)) \n",
" model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add(tf.keras.layers.Dropout(0.2))\n",
"\n",
" model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1))\n",
" model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1))\n",
" model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add(tf.keras.layers.Dropout(0.2))\n",
"\n",
" model.add(tf.keras.layers.Flatten())\n",
" model.add(tf.keras.layers.Dense(512, activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization())\n",
" model.add(tf.keras.layers.Dropout(0.4))\n",
"\n",
" model.add(tf.keras.layers.Dense(43, activation='softmax'))\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 5 - Multiple datasets, multiple models ;-)"
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"def multi_run(dataset_dir, datasets, models, datagen=None,\n",
" train_size=1, test_size=1, batch_size=64, epochs=16, \n",
" verbose=0, extension_dir='last'):\n",
" \"\"\"\n",
" Launches a dataset-model combination\n",
" args:\n",
" dataset_dir : Directory of the datasets\n",
" datasets : List of dataset (whitout .h5)\n",
" models : List of model like { \"model name\":get_model(), ...}\n",
" datagen : Data generator or None (None)\n",
" train_size : % of train dataset to use. 1 mean all. (1)\n",
" test_size : % of test dataset to use. 1 mean all. (1)\n",
" batch_size : Batch size (64)\n",
" epochs : Number of epochs (16)\n",
" verbose : Verbose level (0)\n",
" extension_dir : postfix for logs and models dir (_last)\n",
" return:\n",
" report : Report as a dict for Pandas.\n",
" \"\"\"\n",
" # ---- Logs and models dir\n",
" #\n",
" os.makedirs(f'./run/logs_{extension_dir}', mode=0o750, exist_ok=True)\n",
" os.makedirs(f'./run/models_{extension_dir}', mode=0o750, exist_ok=True)\n",
" \n",
" # ---- Columns of output\n",
" output={}\n",
" output['Dataset'] = []\n",
" output['Size'] = []\n",
" for m in models:\n",
" output[m+'_Accuracy'] = []\n",
" output[m+'_Duration'] = []\n",
"\n",
" # ---- Let's go\n",
" #\n",
" for d_name in datasets:\n",
" print(\"\\nDataset : \",d_name)\n",
"\n",
" # ---- Read dataset\n",
" x_train,y_train,x_test,y_test = read_dataset(dataset_dir, d_name)\n",
" d_size=os.path.getsize(f'{dataset_dir}/{d_name}.h5')/(1024*1024)\n",
" output['Dataset'].append(d_name)\n",
" output['Size'].append(d_size)\n",
" \n",
" # ---- Get the shape\n",
" (n,lx,ly,lz) = x_train.shape\n",
" n_train = int( x_train.shape[0] * train_size )\n",
" n_test = int( x_test.shape[0] * test_size )\n",
"\n",
" # ---- For each model\n",
" for m_name,m_function in models.items():\n",
" print(\" Run model {} : \".format(m_name), end='')\n",
" # ---- get model\n",
" try:\n",
" # ---- Compile it\n",
" model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
" log_dir = f\"./run/logs_{extension_dir}/tb_{d_name}_{m_name}\"\n",
" tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n",
" # ---- Callbacks bestmodel\n",
" save_dir = f\"./run/models_{extension_dir}/model_{d_name}_{m_name}.h5\"\n",
" bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)\n",
" # ---- Train\n",
" start_time = time.time()\n",
" if datagen==None:\n",
" # ---- No data augmentation (datagen=None) --------------------------------------\n",
" history = model.fit(x_train[:n_train], y_train[:n_train],\n",
" batch_size = batch_size,\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" validation_data = (x_test[:n_test], y_test[:n_test]),\n",
" callbacks = [tensorboard_callback, bestmodel_callback])\n",
" else:\n",
" # ---- Data augmentation (datagen given) ----------------------------------------\n",
" datagen.fit(x_train)\n",
" history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),\n",
" steps_per_epoch = int(n_train/batch_size),\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" validation_data = (x_test[:n_test], y_test[:n_test]),\n",
" callbacks = [tensorboard_callback, bestmodel_callback])\n",
" \n",
" # ---- Result\n",
" end_time = time.time()\n",
" duration = end_time-start_time\n",
" accuracy = max(history.history[\"val_accuracy\"])*100\n",
" #\n",
" output[m_name+'_Accuracy'].append(accuracy)\n",
" output[m_name+'_Duration'].append(duration)\n",
" print(f\"Accuracy={accuracy:.2f} and Duration={duration:.2f}\")\n",
" output[m_name+'_Accuracy'].append('0')\n",
" output[m_name+'_Duration'].append('999')\n",
" print('-')\n",
" return output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"---- Run --------------------------------------------------\n",
"\n",
"Dataset : set-24x24-L\n",
" Run model v1 : Accuracy=39.98 and Duration=2.23\n",
" Run model v4 : Accuracy=6.18 and Duration=2.17\n",
"\n",
"Dataset : set-24x24-RGB\n",
" Run model v1 : Accuracy=53.52 and Duration=2.20\n",
" Run model v4 : Accuracy=11.80 and Duration=2.01\n",
"\n",
"Report saved as ./run/report_083052.json\n",
"Duration : 10.37 s\n",
"-----------------------------------------------------------\n"
]
}
],
"# --------- Datasets, models, and more.. -----------------------------------\n",
"#\n",
"# ---- For tests\n",
"# datasets = ['set-24x24-L', 'set-24x24-RGB']\n",
"# models = {'v1':get_model_v1, 'v4':get_model_v2}\n",
"# batch_size = 64\n",
"# epochs = 2\n",
"# train_size = 0.1\n",
"# test_size = 0.1\n",
"# with_datagen = False\n",
"# verbose = 0\n",
"#\n",
"# datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']\n",
"# models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}\n",
"# batch_size = 64\n",
"# train_size = 1\n",
"# test_size = 1\n",
"# verbose = 0\n",
"#\n",
"datasets = ['set-48x48-RGB']\n",
"models = {'v2':get_model_v2}\n",
"batch_size = 64\n",
"epochs = 20\n",
"train_size = 1\n",
"test_size = 1\n",
"with_datagen = True\n",
"verbose = 0\n",
"#\n",
"# ---------------------------------------------------------------------------\n",
"# ---- Data augmentation\n",
"#\n",
"if with_datagen :\n",
" datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,\n",
" featurewise_std_normalization=False,\n",
" width_shift_range=0.1,\n",
" height_shift_range=0.1,\n",
" zoom_range=0.2,\n",
" shear_range=0.1,\n",
" rotation_range=10.)\n",
"else:\n",
" datagen=None\n",
" \n",
"#\n",
"output = multi_run(dataset_dir,\n",
" datasets, models,\n",
" datagen=datagen,\n",
" train_size=train_size, test_size=test_size,\n",
" batch_size=batch_size, epochs=epochs,\n",
" verbose=verbose,\n",
" extension_dir=tag_id)\n",
"#\n",
"report={}\n",
"report['output']=output\n",
"report['description']='train_size={} test_size={} batch_size={} epochs={} data_aug={}'.format(train_size,test_size,batch_size,epochs,with_datagen)\n",
"\n",
"report_name=f'./run/report_{tag_id}.json'\n",
"with open(report_name, 'w') as file:\n",
" json.dump(report, file)\n",
"end_time = time.time()\n",
"duration = end_time-start_time\n",
"print(f'Duration : {duration:.2f} s')\n",
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"execution_count": null,
"metadata": {},
"outputs": [],
"print('\\n{}'.format(time.strftime(\"%A %-d %B %Y, %H:%M:%S\")))\n",
"print(\"The work is done.\\n\")"
]
},
{
"metadata": {},
"<img width=\"80px\" src=\"../fidle/img/00-Fidle-logo-01.svg\"></img>"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
}
},
"nbformat": 4,
"nbformat_minor": 4
}