Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img width=\"800px\" src=\"../fidle/img/00-Fidle-header-01.svg\"></img>\n",
"# <!-- TITLE --> [GTS5] - CNN with GTSRB dataset - Full convolutions \n",
"<!-- DESC --> Episode 5: A lot of models, a lot of datasets and a lot of results.\n",
"<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
"\n",
"## Objectives :\n",
" - Try multiple solutions\n",
" - Design a generic and batch-usable code\n",
" \n",
"The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes. \n",
"The final aim is to recognise them ! \n",
"Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset\n",
"\n",
"Our main steps:\n",
" - Try n models with n datasets\n",
" - Write to be run in batch mode\n",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"\n",
"import numpy as np\n",
"import h5py\n",
"import os,time,json\n",
"import random\n",
"\n",
"from IPython.display import display\n",
"\n",
"VERSION='1.6'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
"metadata": {},
"# ---- Where I am ?\n",
"now = time.strftime(\"%A %d %B %Y - %Hh%Mm%Ss\")\n",
"here = os.getcwd()\n",
"random.seed(time.time())\n",
"tag_id = '{:06}'.format(random.randint(0,99999))\n",
"\n",
"# ---- Who I am ?\n",
"if 'OAR_JOB_ID' in os.environ:\n",
" oar_id=os.environ['OAR_JOB_ID']\n",
"else:\n",
" oar_id='???'\n",
"\n",
"print('\\nFull Convolutions Notebook')\n",
"print(' Version : {}'.format(VERSION))\n",
"print(' Now is : {}'.format(now))\n",
"print(' OAR id : {}'.format(oar_id))\n",
"print(' Tag id : {}'.format(tag_id))\n",
"print(' Working directory : {}'.format(here))\n",
"print(' Keras version :',tf.keras.__version__)\n",
"print(' for tensorboard : --logdir {}/run/logs_{}'.format(here,tag_id))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"def read_dataset(name):\n",
" '''Reads h5 dataset from ./data\n",
"\n",
" Arguments: dataset name, without .h5\n",
" Returns: x_train,y_train,x_test,y_test data'''\n",
" # ---- Read dataset\n",
" filename='./data/'+name+'.h5'\n",
" with h5py.File(filename,'r') as f:\n",
" x_train = f['x_train'][:]\n",
" y_train = f['y_train'][:]\n",
" x_test = f['x_test'][:]\n",
" y_test = f['y_test'][:]\n",
"\n",
" return x_train,y_train,x_test,y_test"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"cell_type": "code",
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"metadata": {},
"outputs": [],
"source": [
"\n",
"# A basic model\n",
"#\n",
"def get_model_v1(lx,ly,lz):\n",
" \n",
" model = keras.models.Sequential()\n",
" \n",
" model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))\n",
" model.add( keras.layers.MaxPooling2D((2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D((2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Flatten()) \n",
" model.add( keras.layers.Dense(1500, activation='relu'))\n",
" model.add( keras.layers.Dropout(0.5))\n",
"\n",
" model.add( keras.layers.Dense(43, activation='softmax'))\n",
" return model\n",
" \n",
"# A more sophisticated model\n",
"#\n",
"def get_model_v2(lx,ly,lz):\n",
" model = keras.models.Sequential()\n",
"\n",
" model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))\n",
" model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))\n",
" model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))\n",
" model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))\n",
" model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add( keras.layers.Dropout(0.2))\n",
"\n",
" model.add( keras.layers.Flatten())\n",
" model.add( keras.layers.Dense(512, activation='relu'))\n",
" model.add( keras.layers.Dropout(0.5))\n",
" model.add( keras.layers.Dense(43, activation='softmax'))\n",
" return model\n",
"\n",
"def get_model_v3(lx,ly,lz):\n",
" model = keras.models.Sequential()\n",
" model.add(tf.keras.layers.Conv2D(32, (5, 5), padding='same', activation='relu', input_shape=(lx,ly,lz)))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1)) \n",
" model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add(tf.keras.layers.Dropout(0.2))\n",
"\n",
" model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1))\n",
" model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization(axis=-1))\n",
" model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
" model.add(tf.keras.layers.Dropout(0.2))\n",
"\n",
" model.add(tf.keras.layers.Flatten())\n",
" model.add(tf.keras.layers.Dense(512, activation='relu'))\n",
" model.add(tf.keras.layers.BatchNormalization())\n",
" model.add(tf.keras.layers.Dropout(0.4))\n",
"\n",
" model.add(tf.keras.layers.Dense(43, activation='softmax'))\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 5 - Multiple datasets, multiple models ;-)"
]
},
{
"cell_type": "code",
"metadata": {},
"outputs": [],
"source": [
"def multi_run(datasets, models, datagen=None,\n",
" train_size=1, test_size=1, batch_size=64, epochs=16, \n",
" verbose=0, extension_dir='last'):\n",
" # ---- Logs and models dir\n",
" #\n",
" os.makedirs('./run/logs_{}'.format(extension_dir), mode=0o750, exist_ok=True)\n",
" os.makedirs('./run/models_{}'.format(extension_dir), mode=0o750, exist_ok=True)\n",
" \n",
" # ---- Columns of output\n",
" output={}\n",
" output['Dataset']=[]\n",
" output['Size'] =[]\n",
" for m in models:\n",
" output[m+'_Accuracy'] = []\n",
" output[m+'_Duration'] = []\n",
"\n",
" # ---- Let's go\n",
" #\n",
" for d_name in datasets:\n",
" print(\"\\nDataset : \",d_name)\n",
"\n",
" # ---- Read dataset\n",
" x_train,y_train,x_test,y_test = read_dataset(d_name)\n",
" d_size=os.path.getsize('./data/'+d_name+'.h5')/(1024*1024)\n",
" output['Dataset'].append(d_name)\n",
" output['Size'].append(d_size)\n",
" \n",
" # ---- Get the shape\n",
" (n,lx,ly,lz) = x_train.shape\n",
" n_train = int(x_train.shape[0]*train_size)\n",
" n_test = int(x_test.shape[0]*test_size)\n",
"\n",
" # ---- For each model\n",
" for m_name,m_function in models.items():\n",
" print(\" Run model {} : \".format(m_name), end='')\n",
" # ---- get model\n",
" try:\n",
" # ---- Compile it\n",
" model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n",
" log_dir = \"./run/logs_{}/tb_{}_{}\".format(extension_dir, d_name, m_name)\n",
" tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n",
" # ---- Callbacks bestmodel\n",
" save_dir = \"./run/models_{}/model_{}_{}.h5\".format(extension_dir, d_name, m_name)\n",
" bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)\n",
" # ---- Train\n",
" start_time = time.time()\n",
" if datagen==None:\n",
" # ---- No data augmentation (datagen=None) --------------------------------------\n",
" history = model.fit(x_train[:n_train], y_train[:n_train],\n",
" batch_size = batch_size,\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" validation_data = (x_test[:n_test], y_test[:n_test]),\n",
" callbacks = [tensorboard_callback, bestmodel_callback])\n",
" else:\n",
" # ---- Data augmentation (datagen given) ----------------------------------------\n",
" datagen.fit(x_train)\n",
" history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),\n",
" steps_per_epoch = int(n_train/batch_size),\n",
" epochs = epochs,\n",
" verbose = verbose,\n",
" validation_data = (x_test[:n_test], y_test[:n_test]),\n",
" callbacks = [tensorboard_callback, bestmodel_callback])\n",
" \n",
" # ---- Result\n",
" end_time = time.time()\n",
" duration = end_time-start_time\n",
" accuracy = max(history.history[\"val_accuracy\"])*100\n",
" #\n",
" output[m_name+'_Accuracy'].append(accuracy)\n",
" output[m_name+'_Duration'].append(duration)\n",
" print(\"Accuracy={:.2f} and Duration={:.2f})\".format(accuracy,duration))\n",
" except:\n",
" output[m_name+'_Accuracy'].append('0')\n",
" output[m_name+'_Duration'].append('999')\n",
" print('-')\n",
" return output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"execution_count": null,
"metadata": {},
"outputs": [],
"# --------- Datasets, models, and more.. -----------------------------------\n",
"#\n",
"# ---- For tests\n",
"# datasets = ['set-24x24-L', 'set-24x24-RGB']\n",
"# models = {'v1':get_model_v1, 'v4':get_model_v2}\n",
"# batch_size = 64\n",
"# epochs = 2\n",
"# train_size = 0.1\n",
"# test_size = 0.1\n",
"# with_datagen = False\n",
"# verbose = 0\n",
"#\n",
"# ---- All possibilities -> Run A\n",
"# datasets = ['set-24x24-L', 'set-24x24-RGB', 'set-48x48-L', 'set-48x48-RGB', 'set-24x24-L-LHE', 'set-24x24-RGB-HE', 'set-48x48-L-LHE', 'set-48x48-RGB-HE']\n",
"# models = {'v1':get_model_v1, 'v2':get_model_v2, 'v3':get_model_v3}\n",
"# batch_size = 64\n",
"# train_size = 1\n",
"# test_size = 1\n",
"# verbose = 0\n",
"#\n",
"# ---- Data augmentation -> Run B\n",
"datasets = ['set-48x48-RGB']\n",
"models = {'v2':get_model_v2}\n",
"batch_size = 64\n",
"epochs = 20\n",
"train_size = 1\n",
"test_size = 1\n",
"with_datagen = True\n",
"verbose = 0\n",
"#\n",
"# ---------------------------------------------------------------------------\n",
"# ---- Data augmentation\n",
"#\n",
"if with_datagen :\n",
" datagen = keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,\n",
" featurewise_std_normalization=False,\n",
" width_shift_range=0.1,\n",
" height_shift_range=0.1,\n",
" zoom_range=0.2,\n",
" shear_range=0.1,\n",
" rotation_range=10.)\n",
"else:\n",
" datagen=None\n",
" \n",
"#\n",
"output = multi_run(datasets, models,\n",
" datagen=datagen,\n",
" train_size=train_size, test_size=test_size,\n",
" batch_size=batch_size, epochs=epochs,\n",
" verbose=verbose,\n",
" extension_dir=tag_id)\n",
"#\n",
"report={}\n",
"report['output']=output\n",
"report['description']='train_size={} test_size={} batch_size={} epochs={} data_aug={}'.format(train_size,test_size,batch_size,epochs,with_datagen)\n",
"\n",
"report_name='./run/report_{}.json'.format(tag_id)\n",
"with open(report_name, 'w') as file:\n",
" json.dump(report, file)\n",
"end_time = time.time()\n",
"duration = end_time-start_time\n",
"print(f'Duration : {duration:.2f} s')\n",
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
]
},
{
"execution_count": null,
"metadata": {},
"outputs": [],
"print('\\n{}'.format(time.strftime(\"%A %-d %B %Y, %H:%M:%S\")))\n",
"print(\"The work is done.\\n\")"
]
},
{
"metadata": {},
"<img width=\"80px\" src=\"../fidle/img/00-Fidle-logo-01.svg\"></img>"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
}
},
"nbformat": 4,
"nbformat_minor": 4
}