From 3b82f51d7377357d59367d34220e5bf591fbf05e Mon Sep 17 00:00:00 2001
From: Jean-Luc Parouty <Jean-Luc.Parouty@grenoble-inp.fr>
Date: Thu, 23 Jan 2020 18:22:59 +0100
Subject: [PATCH] Add BHPD example, another classic one :-)

---
 BHPD/01-DNN-Regression.ipynb      | 310 ++++++++++++++++++++++++++++++
 BHPD/fidle/__init__.py            |   2 +
 BHPD/fidle/pwk.py                 | 244 +++++++++++++++++++++++
 BHPD/fidle/talk.mplstyle          |  33 ++++
 GTSRB/02-First-convolutions.ipynb |  14 +-
 MNIST/01-DNN-MNIST.ipynb          |   4 +-
 MNIST/fidle/pwk.py                |   3 +-
 7 files changed, 599 insertions(+), 11 deletions(-)
 create mode 100644 BHPD/01-DNN-Regression.ipynb
 create mode 100644 BHPD/fidle/__init__.py
 create mode 100644 BHPD/fidle/pwk.py
 create mode 100644 BHPD/fidle/talk.mplstyle

diff --git a/BHPD/01-DNN-Regression.ipynb b/BHPD/01-DNN-Regression.ipynb
new file mode 100644
index 0000000..947b95a
--- /dev/null
+++ b/BHPD/01-DNN-Regression.ipynb
@@ -0,0 +1,310 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Deep Neural Network (DNN) - BHPD dataset\n",
+    "========================================\n",
+    "---\n",
+    "Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020  \n",
+    "\n",
+    "## A very simple example of **regression** :\n",
+    "\n",
+    "Objective is to predicts **housing prices** from a set of house features. \n",
+    "\n",
+    "The **[Boston Housing Dataset](https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html)** consists of price of houses in various places in Boston.  \n",
+    "Alongside with price, the dataset also provide information such as Crime, areas of non-retail business in the town,  \n",
+    "age of people who own the house and many other attributes...\n",
+    "\n",
+    "What we're going to do:\n",
+    "\n",
+    " - Retrieve data\n",
+    " - Preparing the data\n",
+    " - Build a model\n",
+    " - Train the model\n",
+    " - Evaluate the result\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1/ Init python stuff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "from tensorflow import keras\n",
+    "\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "\n",
+    "from IPython.display import display, Markdown\n",
+    "import fidle.pwk as ooo\n",
+    "from importlib import reload\n",
+    "\n",
+    "ooo.init()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2/ Retrieve data\n",
+    "\n",
+    "**From Keras :**\n",
+    "Boston housing is a famous historic dataset, so we can get it directly from [Keras datasets](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(x_train, y_train), (x_test, y_test) = keras.datasets.boston_housing.load_data(test_split=0.2, seed=113)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**From a csv file :**  \n",
+    "More fun !"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv('./data/BostonHousing.csv', header=0)\n",
+    "\n",
+    "display(data.head(5).style.format(\"{0:.2f}\"))\n",
+    "print('DonnÃ©es manquantes : ',data.isna().sum().sum(), '  Shape is : ', data.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3/ Preparing the data\n",
+    "### 3.1/ Split data\n",
+    "We will use 80% of the data for training and 20% for validation.  \n",
+    "x will be input data and y the expected output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ---- Split => train, test\n",
+    "#\n",
+    "data_train = data.sample(frac=0.7, axis=0)\n",
+    "data_test  = data.drop(data_train.index)\n",
+    "\n",
+    "# ---- Split => x,y (medv is price)\n",
+    "#\n",
+    "x_train = data_train.drop('medv',  axis=1)\n",
+    "y_train = data_train['medv']\n",
+    "x_test  = data_test.drop('medv',   axis=1)\n",
+    "y_test  = data_test['medv']\n",
+    "\n",
+    "print('Original data shape was : ',data.shape)\n",
+    "print('x_train : ',x_train.shape, 'y_train : ',y_train.shape)\n",
+    "print('x_test  : ',x_test.shape,  'y_test  : ',y_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.2/ Data normalization\n",
+    "**Note :** \n",
+    " - All input data must be normalized, train and test.  \n",
+    " - To do this we will subtract the mean and divide by the standard deviation.  \n",
+    " - But test data should not be used in any way, even for normalization.  \n",
+    " - The mean and the standard deviation will therefore only be calculated with the train data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(x_train.describe().style.format(\"{0:.2f}\").set_caption(\"Before normalization :\"))\n",
+    "\n",
+    "mean = x_train.mean()\n",
+    "std  = x_train.std()\n",
+    "x_train = (x_train - mean) / std\n",
+    "x_test  = (x_test  - mean) / std\n",
+    "\n",
+    "display(x_train.describe().style.format(\"{0:.2f}\").set_caption(\"After normalization :\"))\n",
+    "\n",
+    "x_train, y_train = np.array(x_train), np.array(y_train)\n",
+    "x_test,  y_test  = np.array(x_test),  np.array(y_test)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4/ Build a model\n",
+    "About informations about : \n",
+    " - [Optimizer](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers)\n",
+    " - [Activation](https://www.tensorflow.org/api_docs/python/tf/keras/activations)\n",
+    " - [Loss](https://www.tensorflow.org/api_docs/python/tf/keras/losses)\n",
+    " - [Metrics](https://www.tensorflow.org/api_docs/python/tf/keras/metrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "  def get_model_v1(shape):\n",
+    "    \n",
+    "    model = keras.models.Sequential()\n",
+    "    model.add(keras.layers.Dense(64, activation='relu', input_shape=shape))\n",
+    "    model.add(keras.layers.Dense(64, activation='relu'))\n",
+    "    model.add(keras.layers.Dense(1))\n",
+    "    \n",
+    "    model.compile(optimizer = 'rmsprop',\n",
+    "                  loss      = 'mse',\n",
+    "                  metrics   = ['mae', 'mse'] )\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5/ Train the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model=get_model_v1( (13,) )\n",
+    "\n",
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Let's go :**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "history = model.fit(x_train,\n",
+    "                    y_train,\n",
+    "                    epochs          = 100,\n",
+    "                    batch_size      = 10,\n",
+    "                    verbose         = 1,\n",
+    "                    validation_data = (x_test, y_test))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6/ Evaluate\n",
+    "### 6.1/ Model evaluation\n",
+    "MAE =  Mean Absolute Error (between the labels and predictions)  \n",
+    "A mae equal to 3 represents an average error in prediction of $3k."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "score = model.evaluate(x_test, y_test, verbose=0)\n",
+    "\n",
+    "print('x_test / loss      : {:5.4f}'.format(score[0]))\n",
+    "print('x_test / mae       : {:5.4f}'.format(score[1]))\n",
+    "print('x_test / mse       : {:5.4f}'.format(score[2]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6.2/ Training history\n",
+    "What was the best result during our training ?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"min( val_mae ) : {:.4f}\".format( min(history.history[\"val_mae\"]) ) )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reload(ooo)\n",
+    "ooo.plot_history(history, plot={'MSE' :['mse', 'val_mse'],\n",
+    "                                'MAE' :['mae', 'val_mae'],\n",
+    "                                'LOSS':['loss','val_loss']})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/BHPD/fidle/__init__.py b/BHPD/fidle/__init__.py
new file mode 100644
index 0000000..098c126
--- /dev/null
+++ b/BHPD/fidle/__init__.py
@@ -0,0 +1,2 @@
+
+VERSION='0.1a'
\ No newline at end of file
diff --git a/BHPD/fidle/pwk.py b/BHPD/fidle/pwk.py
new file mode 100644
index 0000000..f3aa42f
--- /dev/null
+++ b/BHPD/fidle/pwk.py
@@ -0,0 +1,244 @@
+
+# ==================================================================
+#  ____                 _   _           _  __        __         _
+# |  _ \ _ __ __ _  ___| |_(_) ___ __ _| | \ \      / /__  _ __| | __
+# | |_) | '__/ _` |/ __| __| |/ __/ _` | |  \ \ /\ / / _ \| '__| |/ /
+# |  __/| | | (_| | (__| |_| | (_| (_| | |   \ V  V / (_) | |  |   <
+# |_|   |_|  \__,_|\___|\__|_|\___\__,_|_|    \_/\_/ \___/|_|  |_|\_\
+#                                                        module pwk                                   
+# ==================================================================
+# A simple module to host some common functions for practical work
+# pjluc 2020
+
+import os
+import glob
+from datetime import datetime
+import itertools
+import datetime, time
+
+import math
+import numpy as np
+
+import tensorflow as tf
+from tensorflow import keras
+
+import matplotlib
+import matplotlib.pyplot as plt
+import seaborn as sn
+
+VERSION='0.1.8'
+
+
+# -------------------------------------------------------------
+# init_all
+# -------------------------------------------------------------
+#
+def init(mplstyle='fidle/talk.mplstyle'):
+    global VERSION
+    # ---- matplotlib
+    matplotlib.style.use(mplstyle)
+    # ---- Hello world
+#     now = datetime.datetime.now()
+    print('IDLE 2020 - Practical Work Module')
+    print('  Version            :', VERSION)
+    print('  Run time           : {}'.format(time.strftime("%A %-d %B %Y, %H:%M:%S")))
+    print('  Matplotlib style   :', mplstyle)
+    print('  TensorFlow version :',tf.__version__)
+    print('  Keras version      :',tf.keras.__version__)
+          
+# -------------------------------------------------------------
+# Folder cooking
+# -------------------------------------------------------------
+#
+def tag_now():
+    return datetime.datetime.now().strftime("%Y-%m-%d_%Hh%Mm%Ss")
+
+def mkdir(path):
+    os.makedirs(path, mode=0o750, exist_ok=True)
+      
+def get_directory_size(path):
+    """
+    Return the directory size, but only 1 level
+    args:
+        path : directory path
+    return:
+        size in Mo
+    """
+    size=0
+    for f in os.listdir(path):
+        if os.path.isfile(path+'/'+f):
+            size+=os.path.getsize(path+'/'+f)
+    return size/(1024*1024)
+
+# -------------------------------------------------------------
+# shuffle_dataset
+# -------------------------------------------------------------
+#
+def shuffle_np_dataset(x, y):
+    assert (len(x) == len(y)), "x and y must have same size"
+    p = np.random.permutation(len(x))
+    return x[p], y[p]
+
+
+def update_progress(what,i,imax):
+    bar_length = min(40,imax)
+    if (i%int(imax/bar_length))!=0 and i<imax:
+        return
+    progress  = float(i/imax)
+    block     = int(round(bar_length * progress))
+    endofline = '\r' if progress<1 else '\n'
+    text = "{:16s} [{}] {:>5.1f}% of {}".format( what, "#"*block+"-"*(bar_length-block), progress*100, imax)
+    print(text, end=endofline)
+
+
+# -------------------------------------------------------------
+# show_images
+# -------------------------------------------------------------
+#
+def plot_images(x,y, indices, columns=12, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary'):
+    """
+    Show some images in a grid, with legends
+    args:
+        X: images - Shapes must be (-1 lx,ly,1) or (-1 lx,ly,3)
+        y: real classes
+        indices: indices of images to show
+        columns: number of columns (12)
+        x_size,y_size: figure size
+        colorbar: show colorbar (False)
+        y_pred: predicted classes (None)
+        cm: Matplotlib olor map
+    returns: 
+        nothing
+    """
+    rows    = math.ceil(len(indices)/columns)
+    fig=plt.figure(figsize=(columns*x_size, rows*(y_size+0.35)))
+    n=1
+    errors=0 
+    if np.any(y_pred)==None:
+        y_pred=y
+    for i in indices:
+        axs=fig.add_subplot(rows, columns, n)
+        n+=1
+        # ---- Shape is (lx,ly)
+        if len(x[i].shape)==2:
+            xx=x[i]
+        # ---- Shape is (lx,ly,n)
+        if len(x[i].shape)==3:
+            (lx,ly,lz)=x[i].shape
+            if lz==1: 
+                xx=x[i].reshape(lx,ly)
+            else:
+                xx=x[i]
+        img=axs.imshow(xx,   cmap = cm, interpolation='lanczos')
+        axs.spines['right'].set_visible(True)
+        axs.spines['left'].set_visible(True)
+        axs.spines['top'].set_visible(True)
+        axs.spines['bottom'].set_visible(True)
+        axs.set_yticks([])
+        axs.set_xticks([])
+        if y[i]!=y_pred[i]:
+            axs.set_xlabel('{} ({})'.format(y_pred[i],y[i]))
+            axs.xaxis.label.set_color('red')
+            errors+=1
+        else:
+            axs.set_xlabel(y[i])
+        if colorbar:
+            fig.colorbar(img,orientation="vertical", shrink=0.65)
+    plt.show()
+
+def plot_image(x,cm='binary', figsize=(4,4)):
+    (lx,ly,lz)=x.shape
+    plt.figure(figsize=figsize)
+    if lz==1:
+        plt.imshow(x.reshape(lx,ly),   cmap = cm, interpolation='lanczos')
+    else:
+        plt.imshow(x.reshape(lx,ly,lz),cmap = cm, interpolation='lanczos')
+    plt.show()
+
+
+# -------------------------------------------------------------
+# show_history
+# -------------------------------------------------------------
+#
+def plot_history_obsolete(history, figsize=(8,6)):
+    """
+    Show history
+    args:
+        history: history
+        save_as: filename to save or None
+    """
+    # Accuracy 
+    plt.figure(figsize=figsize)
+    plt.plot(history.history['accuracy'])
+    plt.plot(history.history['val_accuracy'])
+    plt.title('Model accuracy')
+    plt.ylabel('Accuracy')
+    plt.xlabel('Epoch')
+    plt.legend(['Train', 'Test'], loc='upper left')
+    plt.show()
+
+    # Loss values
+    plt.figure(figsize=figsize)
+    plt.plot(history.history['loss'])
+    plt.plot(history.history['val_loss'])
+    plt.title('Model loss')
+    plt.ylabel('Loss')
+    plt.xlabel('Epoch')
+    plt.legend(['Train', 'Test'], loc='upper left')
+    plt.show()    
+
+def plot_history(history, figsize=(8,6), 
+                  plot={"Accuracy":['accuracy','val_accuracy'], 'Loss':['loss', 'val_loss']}):
+    """
+    Show history
+    args:
+        history: history
+        figsize: fig size
+        plot: list of data to plot
+    """
+    for title,curves in plot.items():
+        plt.figure(figsize=figsize)
+        plt.title(title)
+        plt.ylabel(title)
+        plt.xlabel('Epoch')
+        for c in curves:
+            plt.plot(history.history[c])
+        plt.legend(curves, loc='upper left')
+        plt.show()
+
+    
+    
+# -------------------------------------------------------------
+# plot_confusion_matrix
+# -------------------------------------------------------------
+#
+def plot_confusion_matrix(cm,
+                          title='Confusion matrix',
+                          figsize=(12,8),
+                          cmap="gist_heat_r",
+                          vmin=0,
+                          vmax=1,
+                          xticks=5,yticks=5):
+    """
+    given a sklearn confusion matrix (cm), make a nice plot
+
+    Args:
+        cm:           confusion matrix from sklearn.metrics.confusion_matrix
+        title:        the text to display at the top of the matrix
+        figsize:      Figure size (12,8)
+        cmap:         color map (gist_heat_r)
+        vmi,vmax:     Min/max 0 and 1
+        
+    """
+ 
+    accuracy = np.trace(cm) / float(np.sum(cm))
+    misclass = 1 - accuracy
+
+    plt.figure(figsize=figsize)
+    sn.heatmap(cm, linewidths=1, linecolor="#ffffff",square=True, 
+               cmap=cmap, xticklabels=xticks, yticklabels=yticks,
+               vmin=vmin,vmax=vmax)
+    plt.ylabel('True label')
+    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
+
+    plt.show()
diff --git a/BHPD/fidle/talk.mplstyle b/BHPD/fidle/talk.mplstyle
new file mode 100644
index 0000000..edf3fed
--- /dev/null
+++ b/BHPD/fidle/talk.mplstyle
@@ -0,0 +1,33 @@
+
+# See : https://matplotlib.org/users/customizing.html
+
+axes.titlesize : 24
+axes.labelsize : 20
+axes.edgecolor      : dimgrey
+axes.labelcolor     : dimgrey
+axes.linewidth      : 2
+axes.grid           : False
+
+axes.prop_cycle    : cycler('color', ['steelblue', 'tomato', '2ca02c', 'd62728', '9467bd', '8c564b', 'e377c2', '7f7f7f', 'bcbd22', '17becf'])
+
+lines.linewidth     : 3
+lines.markersize    : 10
+
+xtick.color         : black
+xtick.labelsize     : 18
+ytick.color         : black
+ytick.labelsize     : 18
+
+axes.spines.left   : True
+axes.spines.bottom : True
+axes.spines.top    : False
+axes.spines.right  : False
+
+savefig.dpi         : 300      # figure dots per inch or 'figure'
+savefig.facecolor   : white    # figure facecolor when saving
+savefig.edgecolor   : white    # figure edgecolor when saving
+savefig.format      : svg
+savefig.bbox        : tight
+savefig.pad_inches  : 0.1
+savefig.transparent : True
+savefig.jpeg_quality: 95
diff --git a/GTSRB/02-First-convolutions.ipynb b/GTSRB/02-First-convolutions.ipynb
index 36c8325..3aa129a 100644
--- a/GTSRB/02-First-convolutions.ipynb
+++ b/GTSRB/02-First-convolutions.ipynb
@@ -228,9 +228,9 @@
     "\n",
     "model.summary()\n",
     "\n",
-    "model.compile(optimizer='adam',\n",
-    "              loss='sparse_categorical_crossentropy',\n",
-    "              metrics=['accuracy'])"
+    "model.compile(optimizer = 'adam',\n",
+    "              loss      = 'sparse_categorical_crossentropy',\n",
+    "              metrics   = ['accuracy'])"
    ]
   },
   {
@@ -256,10 +256,10 @@
     "\n",
     "# ---- Train\n",
     "history = model.fit(  x_train, y_train,\n",
-    "                      batch_size=batch_size,\n",
-    "                      epochs=epochs,\n",
-    "                      verbose=1,\n",
-    "                      validation_data=(x_test, y_test))"
+    "                      batch_size      = batch_size,\n",
+    "                      epochs          = epochs,\n",
+    "                      verbose         = 1,\n",
+    "                      validation_data = (x_test, y_test))"
    ]
   },
   {
diff --git a/MNIST/01-DNN-MNIST.ipynb b/MNIST/01-DNN-MNIST.ipynb
index 0c7c3d3..cf30ece 100644
--- a/MNIST/01-DNN-MNIST.ipynb
+++ b/MNIST/01-DNN-MNIST.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Deep Neural Network (DNN) - MNIST Example\n",
+    "Deep Neural Network (DNN) - MNIST dataset\n",
     "=========================================\n",
     "---\n",
     "Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020  \n",
     "\n",
-    "## A very simple example :\n",
+    "## A very simple example of **classification** :\n",
     "...but a must-have example, a classic !\n",
     "\n",
     " - Retrieve data\n",
diff --git a/MNIST/fidle/pwk.py b/MNIST/fidle/pwk.py
index c21924a..fbefd30 100644
--- a/MNIST/fidle/pwk.py
+++ b/MNIST/fidle/pwk.py
@@ -186,8 +186,7 @@ def plot_history(history, figsize=(8,6)):
     plt.xlabel('Epoch')
     plt.legend(['Train', 'Test'], loc='upper left')
     plt.show()    
-
-
+    
 # -------------------------------------------------------------
 # plot_confusion_matrix
 # -------------------------------------------------------------
-- 
GitLab