Compare revisions

825025e1 · 825025e1 · 825025e1 · 825025e1 · 1cff5b5f · 1cff5b5f
--- a/MNIST/01-DNN-MNIST.ipynb
+++ b/MNIST/01-DNN-MNIST.ipynb
--- a/MNIST/fidle/__init__.py
+++ b/MNIST/fidle/__init__.py
-VERSION='0.1a'
\ No newline at end of file
--- a/MNIST/fidle/pwk.py
+++ b/MNIST/fidle/pwk.py
-# ==================================================================
-#  ____                 _   _           _  __        __         _
-# |  _ \ _ __ __ _  ___| |_(_) ___ __ _| | \ \      / /__  _ __| | __
-# | |_) | '__/ _` |/ __| __| |/ __/ _` | |  \ \ /\ / / _ \| '__| |/ /
-# |  __/| | | (_| | (__| |_| | (_| (_| | |   \ V  V / (_) | |  |   <
-# |_|   |_|  \__,_|\___|\__|_|\___\__,_|_|    \_/\_/ \___/|_|  |_|\_\
-#                                                        module pwk                                   
-# ==================================================================
-# A simple module to host some common functions for practical work
-# pjluc 2020
-import os
-import glob
-from datetime import datetime
-import itertools
-import datetime, time
-import math
-import numpy as np
-from collections.abc import Iterable
-import tensorflow as tf
-from tensorflow import keras
-from sklearn.metrics import confusion_matrix
-import pandas as pd
-import matplotlib
-import matplotlib.pyplot as plt
-import seaborn as sn
-from IPython.display import display, Markdown
-VERSION='0.2.4'
-# -------------------------------------------------------------
-# init_all
-# -------------------------------------------------------------
-#
-def init(mplstyle='fidle/talk.mplstyle'):
-    global VERSION
-    # ---- matplotlib
-    matplotlib.style.use(mplstyle)
-    # ---- Hello world
-#     now = datetime.datetime.now()
-    print('IDLE 2020 - Practical Work Module')
-    print('  Version            :', VERSION)
-    print('  Run time           : {}'.format(time.strftime("%A %-d %B %Y, %H:%M:%S")))
-    print('  Matplotlib style   :', mplstyle)
-    print('  TensorFlow version :',tf.__version__)
-    print('  Keras version      :',tf.keras.__version__)
-# -------------------------------------------------------------
-# Folder cooking
-# -------------------------------------------------------------
-#
-def tag_now():
-    return datetime.datetime.now().strftime("%Y-%m-%d_%Hh%Mm%Ss")
-def mkdir(path):
-    os.makedirs(path, mode=0o750, exist_ok=True)
-def get_directory_size(path):
-    """
-    Return the directory size, but only 1 level
-    args:
-        path : directory path
-    return:
-        size in Mo
-    """
-    size=0
-    for f in os.listdir(path):
-        if os.path.isfile(path+'/'+f):
-            size+=os.path.getsize(path+'/'+f)
-    return size/(1024*1024)
-# -------------------------------------------------------------
-# shuffle_dataset
-# -------------------------------------------------------------
-#
-def shuffle_np_dataset(x, y):
-    """
-    Shuffle a dataset (x,y)
-    args:
-        x,y : dataset
-    return:
-        x,y mixed
-    """
-    assert (len(x) == len(y)), "x and y must have same size"
-    p = np.random.permutation(len(x))
-    return x[p], y[p]
-def update_progress(what,i,imax):
-    """
-    Display a text progress bar, as :
-    My progress bar : ############# 34%
-    args:
-        what  : Progress bas name
-        i     : Current progress
-        imax  : Max value for i
-    return:
-        nothing
-    """
-    bar_length = min(40,imax)
-    if (i%int(imax/bar_length))!=0 and i<imax:
-        return
-    progress  = float(i/imax)
-    block     = int(round(bar_length * progress))
-    endofline = '\r' if progress<1 else '\n'
-    text = "{:16s} [{}] {:>5.1f}% of {}".format( what, "#"*block+"-"*(bar_length-block), progress*100, imax)
-    print(text, end=endofline)
-def rmax(l):
-    """
-    Recursive max() for a given iterable of iterables
-    Should be np.array of np.array or list of list, etc.
-    args:
-        l : Iterable of iterables
-    return: 
-        max value
-    """
-    maxi = float('-inf')
-    for item in l:
-        if isinstance(item, Iterable):
-            t = rmax(item)
-        else:
-            t = item
-        if t > maxi:
-            maxi = t
-    return maxi
-def rmin(l):
-    """
-    Recursive min() for a given iterable of iterables
-    Should be np.array of np.array or list of list, etc.
-    args:
-        l : Iterable of iterables
-    return: 
-        min value
-    """
-    mini = float('inf')
-    for item in l:
-        if isinstance(item, Iterable):
-            t = rmin(item)
-        else:
-            t = item
-        if t < mini:
-            mini = t
-    return mini
-# -------------------------------------------------------------
-# show_images
-# -------------------------------------------------------------
-#
-def plot_images(x,y, indices, columns=12, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary'):
-    """
-    Show some images in a grid, with legends
-    args:
-        X: images - Shapes must be (-1 lx,ly,1) or (-1 lx,ly,3)
-        y: real classes
-        indices: indices of images to show
-        columns: number of columns (12)
-        x_size,y_size: figure size
-        colorbar: show colorbar (False)
-        y_pred: predicted classes (None)
-        cm: Matplotlib olor map
-    returns: 
-        nothing
-    """
-    rows    = math.ceil(len(indices)/columns)
-    fig=plt.figure(figsize=(columns*x_size, rows*(y_size+0.35)))
-    n=1
-    errors=0 
-    if np.any(y_pred)==None:
-        y_pred=y
-    for i in indices:
-        axs=fig.add_subplot(rows, columns, n)
-        n+=1
-        # ---- Shape is (lx,ly)
-        if len(x[i].shape)==2:
-            xx=x[i]
-        # ---- Shape is (lx,ly,n)
-        if len(x[i].shape)==3:
-            (lx,ly,lz)=x[i].shape
-            if lz==1: 
-                xx=x[i].reshape(lx,ly)
-            else:
-                xx=x[i]
-        img=axs.imshow(xx,   cmap = cm, interpolation='lanczos')
-        axs.spines['right'].set_visible(True)
-        axs.spines['left'].set_visible(True)
-        axs.spines['top'].set_visible(True)
-        axs.spines['bottom'].set_visible(True)
-        axs.set_yticks([])
-        axs.set_xticks([])
-        if y[i]!=y_pred[i]:
-            axs.set_xlabel('{} ({})'.format(y_pred[i],y[i]))
-            axs.xaxis.label.set_color('red')
-            errors+=1
-        else:
-            axs.set_xlabel(y[i])
-        if colorbar:
-            fig.colorbar(img,orientation="vertical", shrink=0.65)
-    plt.show()
-def plot_image(x,cm='binary', figsize=(4,4)):
-    """
-    Draw a single image.
-    Image shape can be (lx,ly), (lx,ly,1) or (lx,ly,n)
-    args:
-        x       : image as np array
-        cm      : color map ('binary')
-        figsize : fig size (4,4)
-    """
-    # ---- Shape is (lx,ly)
-    if len(x.shape)==2:
-        xx=x
-    # ---- Shape is (lx,ly,n)
-    if len(x.shape)==3:
-        (lx,ly,lz)=x.shape
-        if lz==1: 
-            xx=x.reshape(lx,ly)
-        else:
-            xx=x
-    # ---- Draw it
-    plt.figure(figsize=figsize)
-    plt.imshow(xx,   cmap = cm, interpolation='lanczos')
-    plt.show()
-# -------------------------------------------------------------
-# show_history
-# -------------------------------------------------------------
-#
-def plot_history(history, figsize=(8,6), 
-                  plot={"Accuracy":['accuracy','val_accuracy'], 'Loss':['loss', 'val_loss']}):
-    """
-    Show history
-    args:
-        history: history
-        figsize: fig size
-        plot: list of data to plot : {<title>:[<metrics>,...], ...}
-    """
-    for title,curves in plot.items():
-        plt.figure(figsize=figsize)
-        plt.title(title)
-        plt.ylabel(title)
-        plt.xlabel('Epoch')
-        for c in curves:
-            plt.plot(history.history[c])
-        plt.legend(curves, loc='upper left')
-        plt.show()
-# -------------------------------------------------------------
-# plot_confusion_matrix
-# -------------------------------------------------------------
-# Bug in Matplotlib 3.1.1
-#
-def plot_confusion_matrix(cm,
-                          title='Confusion matrix',
-                          figsize=(12,8),
-                          cmap="gist_heat_r",
-                          vmin=0,
-                          vmax=1,
-                          xticks=5,yticks=5):
-    """
-    given a sklearn confusion matrix (cm), make a nice plot
-    Note:bug in matplotlib 3.1.1
-    Args:
-        cm:           confusion matrix from sklearn.metrics.confusion_matrix
-        title:        the text to display at the top of the matrix
-        figsize:      Figure size (12,8)
-        cmap:         color map (gist_heat_r)
-        vmi,vmax:     Min/max 0 and 1
-    """
-    accuracy = np.trace(cm) / float(np.sum(cm))
-    misclass = 1 - accuracy
-    plt.figure(figsize=figsize)
-    sn.heatmap(cm, linewidths=1, linecolor="#ffffff",square=True, 
-               cmap=cmap, xticklabels=xticks, yticklabels=yticks,
-               vmin=vmin,vmax=vmax,annot=True)
-    plt.ylabel('True label')
-    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
-    plt.show()
-def display_confusion_matrix(y_true,y_pred,labels=None,color='green',
-                             font_size='12pt', title="#### Confusion matrix is :"):
-    """
-    Show a confusion matrix for a predictions.
-    see : sklearn.metrics.confusion_matrix
-    Args:
-        y_true        Real classes
-        y_pred        Predicted classes
-        labels        List of classes to show in the cm
-        color:        Color for the palette (green)
-        font_size:    Values font size 
-        title:        the text to display at the top of the matrix        
-    """
-    assert (labels!=None),"Label must be set"
-    if title != None :  display(Markdown(title)) 
-    cm = confusion_matrix( y_true,y_pred, normalize="true", labels=labels)
-    df=pd.DataFrame(cm)
-    cmap = sn.light_palette(color, as_cmap=True)
-    df.style.set_properties(**{'font-size': '20pt'})
-    display(df.style.format('{:.2f}') \
-            .background_gradient(cmap=cmap)
-            .set_properties(**{'font-size': font_size}))
-def plot_donut(values, labels, colors=["lightsteelblue","coral"], figsize=(6,6), title=None):
-    """
-    Draw a donut
-    args:
-        values   : list of values
-        labels   : list of labels
-        colors   : list of color (["lightsteelblue","coral"])
-        figsize  : size of figure ( (6,6) )
-    return:
-        nothing
-    """
-    # ---- Title or not
-    if title != None :  display(Markdown(title))
-    # ---- Donut
-    plt.figure(figsize=figsize)
-    # ---- Draw a pie  chart..
-    plt.pie(values, labels=labels, 
-            colors = colors, autopct='%1.1f%%', startangle=70, pctdistance=0.85,
-            textprops={'fontsize': 18},
-            wedgeprops={"edgecolor":"w",'linewidth': 5, 'linestyle': 'solid', 'antialiased': True})
-    # ---- ..with a white circle
-    circle = plt.Circle((0,0),0.70,fc='white')
-    ax = plt.gca()
-    ax.add_artist(circle)
-    # Equal aspect ratio ensures that pie is drawn as a circle
-    plt.axis('equal')  
-    plt.tight_layout()
-    plt.show()
\ No newline at end of file
--- a/MNIST/fidle/talk.mplstyle
+++ b/MNIST/fidle/talk.mplstyle
-# See : https://matplotlib.org/users/customizing.html
-axes.titlesize : 24
-axes.labelsize : 20
-axes.edgecolor      : dimgrey
-axes.labelcolor     : dimgrey
-axes.linewidth      : 2
-axes.grid           : False
-axes.prop_cycle    : cycler('color', ['steelblue', 'tomato', '2ca02c', 'd62728', '9467bd', '8c564b', 'e377c2', '7f7f7f', 'bcbd22', '17becf'])
-lines.linewidth     : 3
-lines.markersize    : 10
-xtick.color         : black
-xtick.labelsize     : 18
-ytick.color         : black
-ytick.labelsize     : 18
-axes.spines.left   : True
-axes.spines.bottom : True
-axes.spines.top    : False
-axes.spines.right  : False
-savefig.dpi         : 300      # figure dots per inch or 'figure'
-savefig.facecolor   : white    # figure facecolor when saving
-savefig.edgecolor   : white    # figure edgecolor when saving
-savefig.format      : svg
-savefig.bbox        : tight
-savefig.pad_inches  : 0.1
-savefig.transparent : True
-savefig.jpeg_quality: 95
--- a/Misc/00-Numpy.ipynb
+++ b/Misc/00-Numpy.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [NP1] - A short introduction to Numpy
+<!-- DESC --> Numpy is an essential tool for the Scientific Python.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Understand the main principles of Numpy and its potential
+Note : This notebook is strongly inspired by the UGA Python Introduction Course
+See : **https://gricad-gitlab.univ-grenoble-alpes.fr/python-uga/py-training-2017**
+%% Cell type:markdown id: tags:
+## Step 1 - Numpy the beginning
+Code using `numpy` usually starts with the import statement
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+```
+%% Cell type:markdown id: tags:
+NumPy provides the type `np.ndarray`. Such array are multidimensionnal sequences of homogeneous elements. They can be created for example with the commands:
+%% Cell type:code id: tags:
+``` python
+# from a list
+l = [10.0, 12.5, 15.0, 17.5, 20.0]
+np.array(l)
+```
+%% Cell type:code id: tags:
+``` python
+# fast but the values can be anything
+np.empty(4)
+```
+%% Cell type:code id: tags:
+``` python
+# slower than np.empty but the values are all 0.
+np.zeros([2, 6])
+```
+%% Cell type:code id: tags:
+``` python
+# multidimensional array
+a = np.ones([2, 3, 4])
+print(a.shape, a.size, a.dtype)
+a
+```
+%% Cell type:code id: tags:
+``` python
+# like range but produce 1D numpy array
+np.arange(4)
+```
+%% Cell type:code id: tags:
+``` python
+# np.arange can produce arrays of floats
+np.arange(4.)
+```
+%% Cell type:code id: tags:
+``` python
+# another convenient function to generate 1D arrays
+np.linspace(10, 20, 5)
+```
+%% Cell type:markdown id: tags:
+A NumPy array can be easily converted to a Python list.
+%% Cell type:code id: tags:
+``` python
+a = np.linspace(10, 20 ,5)
+list(a)
+```
+%% Cell type:code id: tags:
+``` python
+# Or even better
+a.tolist()
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Access elements
+Elements in a `numpy` array can be accessed using indexing and slicing in any dimension. It also offers the same functionalities available in Fortan or Matlab.
+### 2.1 - Indexes and slices
+For example, we can create an array `A` and perform any kind of selection operations on it.
+%% Cell type:code id: tags:
+``` python
+A = np.random.random([4, 5])
+A
+```
+%% Cell type:code id: tags:
+``` python
+# Get the element from second line, first column
+A[1, 0]
+```
+%% Cell type:code id: tags:
+``` python
+# Get the first two lines
+A[:2]
+```
+%% Cell type:code id: tags:
+``` python
+# Get the last column
+A[:, -1]
+```
+%% Cell type:code id: tags:
+``` python
+# Get the first two lines and the columns with an even index
+A[:2, ::2]
+```
+%% Cell type:markdown id: tags:
+### 2.2 -  Using a mask to select elements validating a condition:
+%% Cell type:code id: tags:
+``` python
+cond = A > 0.5
+print(cond)
+print(A[cond])
+```
+%% Cell type:markdown id: tags:
+The mask is in fact a particular case of the advanced indexing capabilities provided by NumPy. For example, it is even possible to use lists for indexing:
+%% Cell type:code id: tags:
+``` python
+# Selecting only particular columns
+print(A)
+A[:, [0, 1, 4]]
+```
+%% Cell type:markdown id: tags:
+## Step 3 -  Perform array manipulations
+### 3.1 - Apply arithmetic operations to whole arrays (element-wise):
+%% Cell type:code id: tags:
+``` python
+(A+5)**2
+```
+%% Cell type:markdown id: tags:
+### 3.2 - Apply functions element-wise:
+%% Cell type:code id: tags:
+``` python
+np.exp(A) # With numpy arrays, use the functions from numpy !
+```
+%% Cell type:markdown id: tags:
+### 3.3 - Setting parts of arrays
+%% Cell type:code id: tags:
+``` python
+A[:, 0] = 0.
+print(A)
+```
+%% Cell type:code id: tags:
+``` python
+# BONUS: Safe element-wise inverse with masks
+cond = (A != 0)
+A[cond] = 1./A[cond]
+print(A)
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Attributes and methods of `np.ndarray` (see the [doc](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html#numpy.ndarray))
+%% Cell type:code id: tags:
+``` python
+for i,v in enumerate([s for s in dir(A) if not s.startswith('__')]):
+    print(f'{v:16}', end='')
+    if (i+1) % 6 == 0 :print('')
+```
+%% Cell type:code id: tags:
+``` python
+# Ex1: Get the mean through different dimensions
+print(A)
+print('Mean value',  A.mean())
+print('Mean line',   A.mean(axis=0))
+print('Mean column', A.mean(axis=1))
+```
+%% Cell type:code id: tags:
+``` python
+# Ex2: Convert a 2D array in 1D keeping all elements
+print(A)
+print(A.shape)
+A_flat = A.flatten()
+print(A_flat, A_flat.shape)
+```
+%% Cell type:markdown id: tags:
+### 4.1 - Remark: dot product
+%% Cell type:code id: tags:
+``` python
+b = np.linspace(0, 10, 11)
+c = b @ b
+# before 3.5:
+# c = b.dot(b)
+print(b)
+print(c)
+```
+%% Cell type:markdown id: tags:
+### 4.2 -  For Matlab users
+|     ` `       | Matlab | Numpy |
+| ------------- | ------ | ----- |
+| element wise  |  `.*`  |  `*`  |
+|  dot product  |  `*`   |  `@`  |
+%% Cell type:markdown id: tags:
+`numpy` arrays can also be sorted, even when they are composed of complex data if the type of the columns are explicitly stated with `dtypes`.
+%% Cell type:markdown id: tags:
+### 4.3 -  NumPy and SciPy sub-packages:
+We already saw `numpy.random` to generate `numpy` arrays filled with random values. This submodule also provides functions related to distributions (Poisson, gaussian, etc.) and permutations.
+%% Cell type:markdown id: tags:
+To perform linear algebra with dense matrices, we can use the submodule `numpy.linalg`. For instance, in order to compute the determinant of a random matrix, we use the method `det`
+%% Cell type:code id: tags:
+``` python
+A = np.random.random([5,5])
+print(A)
+np.linalg.det(A)
+```
+%% Cell type:code id: tags:
+``` python
+squared_subA = A[1:3, 1:3]
+print(squared_subA)
+np.linalg.inv(squared_subA)
+```
+%% Cell type:markdown id: tags:
+### 4.4 -  Introduction to Pandas: Python Data Analysis Library
+Pandas is an open source library providing high-performance, easy-to-use data structures and data analysis tools for Python.
+[Pandas tutorial](https://pandas.pydata.org/pandas-docs/stable/10min.html)
+[Grenoble Python Working Session](https://github.com/iutzeler/Pres_Pandas/)
+[Pandas for SQL Users](http://sergilehkyi.com/translating-sql-to-pandas/)
+[Pandas Introduction Training HPC Python@UGA](https://gricad-gitlab.univ-grenoble-alpes.fr/python-uga/training-hpc/-/blob/master/ipynb/11_pandas.ipynb)
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [NP1] - A short introduction to Numpy
+<!-- DESC --> Numpy is an essential tool for the Scientific Python.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Understand the main principles of Numpy and its potential
+Note : This notebook is strongly inspired by the UGA Python Introduction Course
+See : **https://gricad-gitlab.univ-grenoble-alpes.fr/python-uga/py-training-2017**
+%% Cell type:markdown id: tags:
+## Step 1 - Numpy the beginning
+Code using `numpy` usually starts with the import statement
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+```
+%% Cell type:markdown id: tags:
+NumPy provides the type `np.ndarray`. Such array are multidimensionnal sequences of homogeneous elements. They can be created for example with the commands:
+%% Cell type:code id: tags:
+``` python
+# from a list
+l = [10.0, 12.5, 15.0, 17.5, 20.0]
+np.array(l)
+```
+%% Cell type:code id: tags:
+``` python
+# fast but the values can be anything
+np.empty(4)
+```
+%% Cell type:code id: tags:
+``` python
+# slower than np.empty but the values are all 0.
+np.zeros([2, 6])
+```
+%% Cell type:code id: tags:
+``` python
+# multidimensional array
+a = np.ones([2, 3, 4])
+print(a.shape, a.size, a.dtype)
+a
+```
+%% Cell type:code id: tags:
+``` python
+# like range but produce 1D numpy array
+np.arange(4)
+```
+%% Cell type:code id: tags:
+``` python
+# np.arange can produce arrays of floats
+np.arange(4.)
+```
+%% Cell type:code id: tags:
+``` python
+# another convenient function to generate 1D arrays
+np.linspace(10, 20, 5)
+```
+%% Cell type:markdown id: tags:
+A NumPy array can be easily converted to a Python list.
+%% Cell type:code id: tags:
+``` python
+a = np.linspace(10, 20 ,5)
+list(a)
+```
+%% Cell type:code id: tags:
+``` python
+# Or even better
+a.tolist()
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Access elements
+Elements in a `numpy` array can be accessed using indexing and slicing in any dimension. It also offers the same functionalities available in Fortan or Matlab.
+### 2.1 - Indexes and slices
+For example, we can create an array `A` and perform any kind of selection operations on it.
+%% Cell type:code id: tags:
+``` python
+A = np.random.random([4, 5])
+A
+```
+%% Cell type:code id: tags:
+``` python
+# Get the element from second line, first column
+A[1, 0]
+```
+%% Cell type:code id: tags:
+``` python
+# Get the first two lines
+A[:2]
+```
+%% Cell type:code id: tags:
+``` python
+# Get the last column
+A[:, -1]
+```
+%% Cell type:code id: tags:
+``` python
+# Get the first two lines and the columns with an even index
+A[:2, ::2]
+```
+%% Cell type:markdown id: tags:
+### 2.2 -  Using a mask to select elements validating a condition:
+%% Cell type:code id: tags:
+``` python
+cond = A > 0.5
+print(cond)
+print(A[cond])
+```
+%% Cell type:markdown id: tags:
+The mask is in fact a particular case of the advanced indexing capabilities provided by NumPy. For example, it is even possible to use lists for indexing:
+%% Cell type:code id: tags:
+``` python
+# Selecting only particular columns
+print(A)
+A[:, [0, 1, 4]]
+```
+%% Cell type:markdown id: tags:
+## Step 3 -  Perform array manipulations
+### 3.1 - Apply arithmetic operations to whole arrays (element-wise):
+%% Cell type:code id: tags:
+``` python
+(A+5)**2
+```
+%% Cell type:markdown id: tags:
+### 3.2 - Apply functions element-wise:
+%% Cell type:code id: tags:
+``` python
+np.exp(A) # With numpy arrays, use the functions from numpy !
+```
+%% Cell type:markdown id: tags:
+### 3.3 - Setting parts of arrays
+%% Cell type:code id: tags:
+``` python
+A[:, 0] = 0.
+print(A)
+```
+%% Cell type:code id: tags:
+``` python
+# BONUS: Safe element-wise inverse with masks
+cond = (A != 0)
+A[cond] = 1./A[cond]
+print(A)
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Attributes and methods of `np.ndarray` (see the [doc](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html#numpy.ndarray))
+%% Cell type:code id: tags:
+``` python
+for i,v in enumerate([s for s in dir(A) if not s.startswith('__')]):
+    print(f'{v:16}', end='')
+    if (i+1) % 6 == 0 :print('')
+```
+%% Cell type:code id: tags:
+``` python
+# Ex1: Get the mean through different dimensions
+print(A)
+print('Mean value',  A.mean())
+print('Mean line',   A.mean(axis=0))
+print('Mean column', A.mean(axis=1))
+```
+%% Cell type:code id: tags:
+``` python
+# Ex2: Convert a 2D array in 1D keeping all elements
+print(A)
+print(A.shape)
+A_flat = A.flatten()
+print(A_flat, A_flat.shape)
+```
+%% Cell type:markdown id: tags:
+### 4.1 - Remark: dot product
+%% Cell type:code id: tags:
+``` python
+b = np.linspace(0, 10, 11)
+c = b @ b
+# before 3.5:
+# c = b.dot(b)
+print(b)
+print(c)
+```
+%% Cell type:markdown id: tags:
+### 4.2 -  For Matlab users
+|     ` `       | Matlab | Numpy |
+| ------------- | ------ | ----- |
+| element wise  |  `.*`  |  `*`  |
+|  dot product  |  `*`   |  `@`  |
+%% Cell type:markdown id: tags:
+`numpy` arrays can also be sorted, even when they are composed of complex data if the type of the columns are explicitly stated with `dtypes`.
+%% Cell type:markdown id: tags:
+### 4.3 -  NumPy and SciPy sub-packages:
+We already saw `numpy.random` to generate `numpy` arrays filled with random values. This submodule also provides functions related to distributions (Poisson, gaussian, etc.) and permutations.
+%% Cell type:markdown id: tags:
+To perform linear algebra with dense matrices, we can use the submodule `numpy.linalg`. For instance, in order to compute the determinant of a random matrix, we use the method `det`
+%% Cell type:code id: tags:
+``` python
+A = np.random.random([5,5])
+print(A)
+np.linalg.det(A)
+```
+%% Cell type:code id: tags:
+``` python
+squared_subA = A[1:3, 1:3]
+print(squared_subA)
+np.linalg.inv(squared_subA)
+```
+%% Cell type:markdown id: tags:
+### 4.4 -  Introduction to Pandas: Python Data Analysis Library
+Pandas is an open source library providing high-performance, easy-to-use data structures and data analysis tools for Python.
+[Pandas tutorial](https://pandas.pydata.org/pandas-docs/stable/10min.html)
+[Grenoble Python Working Session](https://github.com/iutzeler/Pres_Pandas/)
+[Pandas for SQL Users](http://sergilehkyi.com/translating-sql-to-pandas/)
+[Pandas Introduction Training HPC Python@UGA](https://gricad-gitlab.univ-grenoble-alpes.fr/python-uga/training-hpc/-/blob/master/ipynb/11_pandas.ipynb)
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/Misc/01-Activation-Functions.ipynb
+++ b/Misc/01-Activation-Functions.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [ACTF1] - Activation functions
+<!-- DESC --> Some activation functions, with their derivatives.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - View the main activation functions
+Les fonctions d'activation dans Keras :
+https://www.tensorflow.org/api_docs/python/tf/keras/activations
+## What we're going to do :
+ - Juste visualiser les principales fonctions d'activation
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import math
+from math import erfc, sqrt, exp
+from math import pi as PI
+from math import e as E
+import sys
+import fidle
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('ACTF1')
+```
+%% Cell type:code id: tags:
+``` python
+SELU_A = -sqrt(2/PI)/(erfc(1/sqrt(2))*exp(1/2)-1)
+SELU_L = (1-erfc(1/sqrt(2))*sqrt(E))*sqrt(2*PI) / (2*erfc(sqrt(2))*E*E+PI*erfc(1/sqrt(2))**2*E-2*(2+PI)*erfc(1/sqrt(2))*sqrt(E)+PI+2)**0.5
+def heaviside(z):
+    return np.where(z<0,0,1)
+def sign(z):
+    return np.where(z<0,-1,1)
+#    return np.sign(z)
+def sigmoid(z):
+    return 1 / (1 + np.exp(-z))
+def tanh(z):
+    return np.tanh(z)
+def relu(z):
+    return np.maximum(0, z)
+def leaky_relu(z,a=0.05):
+    return np.maximum(a*z, z)
+def elu(z,a=1):
+    #y=z.copy()
+    y=a*(np.exp(z)-1)
+    y[z>0]=z[z>0]
+    return y
+def selu(z):
+    return SELU_L*elu(z,a=SELU_A)
+def derivative(f, z, eps=0.000001):
+    return (f(z + eps) - f(z - eps))/(2 * eps)
+```
+%% Cell type:code id: tags:
+``` python
+pw=5
+ph=5
+z = np.linspace(-5, 5, 200)
+# ------ Heaviside
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(0, 0, "rx", markersize=10)
+ax.plot(z, heaviside(z),              linestyle='-',  label="Heaviside")
+ax.plot(z, derivative(heaviside, z),  linewidth=3, alpha=0.6, label="dHeaviside/dx")
+# ax.plot(z, sign(z),                  label="Heaviside")
+ax.set_title("Heaviside")
+fidle.scrawler.save_fig('Heaviside')
+plt.show()
+# ----- Logit/Sigmoid
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, sigmoid(z),             label="Sigmoid")
+ax.plot(z, derivative(sigmoid, z), linewidth=3, alpha=0.6, label="dSigmoid/dx")
+ax.set_title("Logit")
+fidle.scrawler.save_fig('Logit')
+plt.show()
+# ----- Tanh
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, tanh(z),             label="Tanh")
+ax.plot(z, derivative(tanh, z), linewidth=3, alpha=0.6, label="dTanh/dx")
+ax.set_title("Tanh")
+fidle.scrawler.save_fig('Tanh')
+plt.show()
+# ----- Relu
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, relu(z),             label="ReLU")
+ax.plot(z, derivative(relu, z), linewidth=3, alpha=0.6, label="dReLU/dx")
+ax.set_title("ReLU")
+fidle.scrawler.save_fig('ReLU')
+plt.show()
+# ----- Leaky Relu
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, leaky_relu(z),              label="Leaky ReLU")
+ax.plot(z, derivative( leaky_relu, z), linewidth=3, alpha=0.6, label="dLeakyReLU/dx")
+ax.set_title("Leaky ReLU (α=0.05)")
+fidle.scrawler.save_fig('LeakyReLU')
+plt.show()
+# ----- Elu
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, elu(z),              label="ReLU")
+ax.plot(z, derivative( elu, z), linewidth=3, alpha=0.6, label="dExpReLU/dx")
+ax.set_title("ELU (α=1)")
+fidle.scrawler.save_fig('ELU')
+plt.show()
+# ----- Selu
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, selu(z),              label="SeLU")
+ax.plot(z, derivative( selu, z), linewidth=3, alpha=0.6, label="dSeLU/dx")
+ax.set_title("ELU (SELU)")
+fidle.scrawler.save_fig('SeLU')
+plt.show()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [ACTF1] - Activation functions
+<!-- DESC --> Some activation functions, with their derivatives.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - View the main activation functions
+Les fonctions d'activation dans Keras :
+https://www.tensorflow.org/api_docs/python/tf/keras/activations
+## What we're going to do :
+ - Juste visualiser les principales fonctions d'activation
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+import math
+from math import erfc, sqrt, exp
+from math import pi as PI
+from math import e as E
+import sys
+import fidle
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('ACTF1')
+```
+%% Cell type:code id: tags:
+``` python
+SELU_A = -sqrt(2/PI)/(erfc(1/sqrt(2))*exp(1/2)-1)
+SELU_L = (1-erfc(1/sqrt(2))*sqrt(E))*sqrt(2*PI) / (2*erfc(sqrt(2))*E*E+PI*erfc(1/sqrt(2))**2*E-2*(2+PI)*erfc(1/sqrt(2))*sqrt(E)+PI+2)**0.5
+def heaviside(z):
+    return np.where(z<0,0,1)
+def sign(z):
+    return np.where(z<0,-1,1)
+#    return np.sign(z)
+def sigmoid(z):
+    return 1 / (1 + np.exp(-z))
+def tanh(z):
+    return np.tanh(z)
+def relu(z):
+    return np.maximum(0, z)
+def leaky_relu(z,a=0.05):
+    return np.maximum(a*z, z)
+def elu(z,a=1):
+    #y=z.copy()
+    y=a*(np.exp(z)-1)
+    y[z>0]=z[z>0]
+    return y
+def selu(z):
+    return SELU_L*elu(z,a=SELU_A)
+def derivative(f, z, eps=0.000001):
+    return (f(z + eps) - f(z - eps))/(2 * eps)
+```
+%% Cell type:code id: tags:
+``` python
+pw=5
+ph=5
+z = np.linspace(-5, 5, 200)
+# ------ Heaviside
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(0, 0, "rx", markersize=10)
+ax.plot(z, heaviside(z),              linestyle='-',  label="Heaviside")
+ax.plot(z, derivative(heaviside, z),  linewidth=3, alpha=0.6, label="dHeaviside/dx")
+# ax.plot(z, sign(z),                  label="Heaviside")
+ax.set_title("Heaviside")
+fidle.scrawler.save_fig('Heaviside')
+plt.show()
+# ----- Logit/Sigmoid
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, sigmoid(z),             label="Sigmoid")
+ax.plot(z, derivative(sigmoid, z), linewidth=3, alpha=0.6, label="dSigmoid/dx")
+ax.set_title("Logit")
+fidle.scrawler.save_fig('Logit')
+plt.show()
+# ----- Tanh
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, tanh(z),             label="Tanh")
+ax.plot(z, derivative(tanh, z), linewidth=3, alpha=0.6, label="dTanh/dx")
+ax.set_title("Tanh")
+fidle.scrawler.save_fig('Tanh')
+plt.show()
+# ----- Relu
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, relu(z),             label="ReLU")
+ax.plot(z, derivative(relu, z), linewidth=3, alpha=0.6, label="dReLU/dx")
+ax.set_title("ReLU")
+fidle.scrawler.save_fig('ReLU')
+plt.show()
+# ----- Leaky Relu
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, leaky_relu(z),              label="Leaky ReLU")
+ax.plot(z, derivative( leaky_relu, z), linewidth=3, alpha=0.6, label="dLeakyReLU/dx")
+ax.set_title("Leaky ReLU (α=0.05)")
+fidle.scrawler.save_fig('LeakyReLU')
+plt.show()
+# ----- Elu
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, elu(z),              label="ReLU")
+ax.plot(z, derivative( elu, z), linewidth=3, alpha=0.6, label="dExpReLU/dx")
+ax.set_title("ELU (α=1)")
+fidle.scrawler.save_fig('ELU')
+plt.show()
+# ----- Selu
+#
+fig, ax = plt.subplots(1, 1)
+fig.set_size_inches(pw,ph)
+ax.set_xlim(-5, 5)
+ax.set_ylim(-2, 2)
+ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
+ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
+ax.plot(z, selu(z),              label="SeLU")
+ax.plot(z, derivative( selu, z), linewidth=3, alpha=0.6, label="dSeLU/dx")
+ax.set_title("ELU (SELU)")
+fidle.scrawler.save_fig('SeLU')
+plt.show()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/Misc/02-Using-pandas.ipynb
+++ b/Misc/02-Using-pandas.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [PANDAS1] - Quelques exemples avec Pandas
+<!-- DESC --> pandas is another essential tool for the Scientific Python.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Understand how to slice a dataset
+%% Cell type:markdown id: tags:
+## Step 1 - A little cooking with datasets
+%% Cell type:code id: tags:
+``` python
+import pandas as pd
+import numpy  as np
+```
+%% Cell type:code id: tags:
+``` python
+# Get some data
+a = np.arange(50).reshape(10,5)
+print('Starting data: \n',a)
+```
+%% Cell type:code id: tags:
+``` python
+# Create a DataFrame
+df_all = pd.DataFrame(a, columns=['A','B','C','D','E'])
+print('\nDataFrame :')
+display(df_all)
+```
+%% Cell type:code id: tags:
+``` python
+# Shuffle data
+df_all = df_all.sample(frac=1, axis=0)
+print('\nDataFrame randomly shuffled :')
+display(df_all)
+```
+%% Cell type:code id: tags:
+``` python
+# Get a train part
+df_train = df_all.sample(frac=0.8, axis=0)
+print('\nTrain set (80%) :')
+display(df_train)
+```
+%% Cell type:code id: tags:
+``` python
+# Get test set as all - train
+df_test = df_all.drop(df_train.index)
+print('\nTest set (all - train) :')
+display(df_test)
+```
+%% Cell type:code id: tags:
+``` python
+x_train = df_train.drop('E',  axis=1)
+y_train = df_train['E']
+x_test  = df_test.drop('E',   axis=1)
+y_test  = df_test['E']
+display(x_train)
+display(y_train)
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [PANDAS1] - Quelques exemples avec Pandas
+<!-- DESC --> pandas is another essential tool for the Scientific Python.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Understand how to slice a dataset
+%% Cell type:markdown id: tags:
+## Step 1 - A little cooking with datasets
+%% Cell type:code id: tags:
+``` python
+import pandas as pd
+import numpy  as np
+```
+%% Cell type:code id: tags:
+``` python
+# Get some data
+a = np.arange(50).reshape(10,5)
+print('Starting data: \n',a)
+```
+%% Cell type:code id: tags:
+``` python
+# Create a DataFrame
+df_all = pd.DataFrame(a, columns=['A','B','C','D','E'])
+print('\nDataFrame :')
+display(df_all)
+```
+%% Cell type:code id: tags:
+``` python
+# Shuffle data
+df_all = df_all.sample(frac=1, axis=0)
+print('\nDataFrame randomly shuffled :')
+display(df_all)
+```
+%% Cell type:code id: tags:
+``` python
+# Get a train part
+df_train = df_all.sample(frac=0.8, axis=0)
+print('\nTrain set (80%) :')
+display(df_train)
+```
+%% Cell type:code id: tags:
+``` python
+# Get test set as all - train
+df_test = df_all.drop(df_train.index)
+print('\nTest set (all - train) :')
+display(df_test)
+```
+%% Cell type:code id: tags:
+``` python
+x_train = df_train.drop('E',  axis=1)
+y_train = df_train['E']
+x_test  = df_test.drop('E',   axis=1)
+y_test  = df_test['E']
+display(x_train)
+display(y_train)
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/Misc/03-Using-Pytorch.ipynb
+++ b/Misc/03-Using-Pytorch.ipynb
+%% Cell type:markdown id:51be1de8 tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [PYTORCH1] - Practical Lab : PyTorch
+<!-- DESC --> PyTorch est l'un des principaux framework utilisé dans le Deep Learning
+<!-- AUTHOR : Kamel Guerda (CNRS/IDRIS) -->
+## Objectives :
+ - Understand PyTorch
+%% Cell type:markdown id:1959d3d5-388e-4c43-8318-342f08e6b024 tags:
+## **Introduction**
+%% Cell type:markdown id:a6da1305-551a-4549-abed-641415823a33 tags:
+**PyTorch** is an open-source machine learning library developed by Facebook's AI Research lab. It offers an imperative and dynamic computational model, making it particularly easy and intuitive for researchers. Its primary feature is the tensor, a multi-dimensional array similar to NumPy's ndarray, but with GPU acceleration.
+%% Cell type:markdown id:54c79dfb-a061-4b72-afe3-c97c28071e5c tags:
+### **Installation and usage**
+%% Cell type:markdown id:20852981-c289-4c4e-8099-2c5efef58e3b tags:
+Whether you're working on the supercomputer Jean Zay or your own machine, getting your environment ready is the first step. Here's how to proceed:
+%% Cell type:markdown id:a88f32bd-37f6-4e99-97e0-62283a146a1f tags:
+#### **On Jean Zay**
+%% Cell type:markdown id:8421a9f0-130d-40ef-8a7a-066bf9147066 tags:
+For those accessing the Jean Zay supercomputer (you should already be at step 3):
+1. **Access JupyterHub**: Go to [https://jupyterhub.idris.fr](https://jupyterhub.idris.fr). The login credentials are the same as those used to access the Jean Zay machine. Ensure your IP address is whitelisted (add a new IP via the account management form if needed).
+2. **Create a JupyterLab Instance**: Choose to create the instance either on a frontend node (e.g., for internet access) or on a compute node by reserving resources via Slurm. Select the appropriate options such as workspace, allocated resources, billing, etc.
+3. **Choose the Kernel**: IDRIS provides kernels based on modules installed on Jean Zay. This includes various versions of Python, Tensorflow, and PyTorch. Create a new notebook with the desired kernel through the launcher or change the kernel on an existing notebook by clicking the kernel name at the top right of the screen.
+4. For advanced features like Tensorboard, MLFlow, custom kernel creation, etc., refer to the [JupyterHub technical documentation](https://jupyterhub.idris.fr/services/documentation/).
+%% Cell type:markdown id:a168594c-cf18-4ed8-babf-242b56b3e0b7 tags:
+> **Task:** Verifying Your Kernel in the upper top corner
+>    - In JupyterLab, at the top right of your notebook, you should see the name of your current kernel.
+>    - Ensure it matches "PyTorch 2.0" or a similar name indicating the PyTorch version.
+>    - If it doesn't, click on the kernel name and select the appropriate kernel from the list.
+%% Cell type:markdown id:0aaadeee-5115-48d0-aa57-20a0a63d5054 tags:
+#### **Elsewhere**
+%% Cell type:markdown id:5d34951e-1b7b-4776-9449-eff57a9385f4 tags:
+For users on other platforms:
+1. Install PyTorch by following the official [installation guide](https://pytorch.org/get-started/locally/).
+2. If you have a GPU, ensure you've installed the necessary CUDA toolkit and cuDNN libraries.
+3. Launch your preferred Python environment, whether it's Jupyter notebook, an IDE like PyCharm, or just the terminal.
+Once your setup is complete, you're ready to dive in. Let's explore the fascinating world of deep learning!
+%% Cell type:markdown id:7552d5ac-eb8c-48e0-9e61-3b056d560f7b tags:
+### **Version**
+%% Cell type:code id:272e492f-35c5-4293-b504-8e8632da1b73 tags:
+``` python
+# Importing PyTorch
+import torch
+# TODO: Print the version of PyTorch being used
+```
+%% Cell type:markdown id:9fdbe225-4e06-4ad0-abca-4325457dc0e1 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To print the version of PyTorch you're using, you can access the <code>__version__</code> attribute of the <code>torch</code> module.
+```python
+print(torch.__version__)
+```
+%% Cell type:markdown id:72752068-02fe-4e44-8c27-40e8f66680c9 tags:
+**Why PyTorch 2.0 is a Game-Changer**
+PyTorch 2.0 represents a major step in the evolution of this popular deep learning library. As part of the transition to the 2-series, let's highlight some reasons why this version is pivotal:
+1. **Performance**: With PyTorch 2.0, performance has been supercharged at the compiler level, offering faster execution and support for Dynamic Shapes and Distributed systems.
+2. **torch.compile**: This introduces a more Pythonic approach, moving some parts of PyTorch from C++ back to Python. Notably, across a test set of 163 open-source models, the use of `torch.compile` resulted in a 43% speed increase during training on an NVIDIA A100 GPU.
+3. **Innovative Technologies**: Technologies like TorchDynamo and TorchInductor, both written in Python, make PyTorch more flexible and developer-friendly.
+4. **Staying Pythonic**: PyTorch 2.0 emphasizes Python-centric development, reducing barriers for developers and vendors.
+As we progress in this lab, we'll dive deeper into some of these features, giving you hands-on experience with the power and flexibility of PyTorch 2.0.
+%% Cell type:markdown id:bc215c02-1f16-48be-88f9-5080fd2be9ed tags:
+## **Pytorch Fundamentals**
+%% Cell type:markdown id:bcd7f0fc-a714-495e-9307-e48964abd85b tags:
+### **Tensors**
+%% Cell type:markdown id:6e185bf6-3d3c-4a43-b425-e6aa3da5d5dd tags:
+A **tensor** is a generalization of vectors and matrices and is easily understood as a multi-dimensional array. In the context of PyTorch:
+- A 0-dimensional tensor is a scalar (a single number).
+- A 1-dimensional tensor is a vector.
+- A 2-dimensional tensor is a matrix.
+- ... and so on for higher dimensions.
+Tensors are fundamental to PyTorch not just as data containers but also for their compatibility with GPU acceleration, making operations on them extremely fast. This acceleration is vital for training large neural networks.
+Let's start our journey with tensors by examining how PyTorch handles scalars.
+%% Cell type:markdown id:fa90e399-3955-4417-a4a3-c0c812ebb1d9 tags:
+#### **Scalars in PyTorch**
+### Scalars in PyTorch
+A scalar, being a 0-dimensional tensor, is simply a single number. While it might seem trivial, understanding scalars in PyTorch lays the foundation for grasping more complex tensor structures. Familiarize yourself with the `torch.tensor()` function from the [official documentation](https://pytorch.org/docs/stable/generated/torch.tensor.html) before proceeding.
+> **Task**: Create a scalar tensor in PyTorch and examine its properties.
+%% Cell type:code id:b6db1841-0fab-4df0-b699-058d5a477ca6 tags:
+``` python
+# TODO: Create a scalar tensor with the value 7.5
+scalar_tensor = # Your code here
+# Print the scalar tensor
+print("Scalar Tensor:", scalar_tensor)
+# TODO: Print its dimension, shape, and type
+```
+%% Output
+      Cell In[2], line 2
+        scalar_tensor = # Your code here
+                        ^
+    SyntaxError: invalid syntax
+%% Cell type:markdown id:c9bc265c-9a7f-4588-8586-562b390d63d9 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To create a scalar tensor, use the <code>torch.tensor()</code> function. To retrieve its dimension, shape, and type, you can use the <code>.dim()</code>, <code>.shape</code>, and <code>.dtype</code> attributes respectively.
+Here's how you can achieve that:
+```python
+scalar_tensor = torch.tensor(7.5)
+print("Scalar Tensor:", scalar_tensor)
+print("Dimension:", scalar_tensor.dim())
+print("Shape:", scalar_tensor.shape)
+print("Type:", scalar_tensor.dtype)
+```
+</details>
+%% Cell type:markdown id:fc240c26-5866-4080-bbb9-d5cde1500300 tags:
+#### **Vectors in PyTorch**
+A vector in PyTorch is a 1-dimensional tensor. It's essentially a list of numbers that can represent anything from a sequence of data points to the weights of a neural network layer.
+In this section, we'll see how to create and manipulate vectors using PyTorch. We'll also look at some basic operations you can perform on them.
+> **Task**: Create a 1-dimensional tensor (vector) with values `[1.5, 2.3, 3.1, 4.8, 5.2]` and print its dimension, shape, and type.
+Start by referring to the `torch.tensor()` function in the [official documentation](https://pytorch.org/docs/stable/generated/torch.tensor.html) to understand how to create tensors of varying dimensions.
+%% Cell type:code id:e9503b49-38d1-45d9-910f-761da82cfbd0 tags:
+``` python
+# TODO: Create a 1-dimensional tensor (vector) with values [1.5, 2.3, 3.1, 4.8, 5.2]
+vector_tensor = # Your code here
+# Print the vector tensor
+print("Vector Tensor:", vector_tensor)
+# TODO: Print its dimension, shape, and type
+```
+%% Output
+      Cell In[3], line 2
+        vector_tensor = # Your code here
+                        ^
+    SyntaxError: invalid syntax
+%% Cell type:markdown id:13252d1f-004f-42e0-aec9-56322b43ab72 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Creating a 1-dimensional tensor is similar to creating a scalar. Instead of a single number, you pass a list of numbers to the <code>torch.tensor()</code> function. The <code>.dim()</code>, <code>.shape</code>, and <code>.dtype</code> attributes will help you retrieve its properties.
+```python
+vector_tensor = torch.tensor([1.5, 2.3, 3.1, 4.8, 5.2])
+print("Vector Tensor:", vector_tensor)
+print("Dimension:", vector_tensor.dim())
+print("Shape:", vector_tensor.shape)
+print("Type:", vector_tensor.dtype)
+```
+</details>
+%% Cell type:markdown id:7bfc47a8-e99d-4683-ac36-287f35a76fd0 tags:
+#### **Vector Operations**
+Vectors are not just static entities; we often perform various operations on them, especially in the context of neural networks. This includes addition, subtraction, scalar multiplication, dot products, etc.
+> **Task**: Using the previously defined `vector_tensor`, perform the following operations:
+1. Add 5 to all the elements of the vector.
+2. Multiply all the elements of the vector by 2.
+3. Compute the dot product of the vector with itself.
+%% Cell type:code id:86182e1c-5491-4743-a7c8-10b9effd8194 tags:
+``` python
+# TODO: Add 5 to all elements
+vector_added = # Your code here
+# TODO: Multiply all elements by 2
+vector_multiplied = # Your code here
+# TODO: Compute the dot product with itself
+dot_product = # Your code here
+# Print the results
+print("Vector after addition:", vector_added)
+print("Vector after multiplication:", vector_multiplied)
+print("Dot Product:", dot_product)
+```
+%% Output
+      Cell In[4], line 2
+        vector_added = # Your code here
+                       ^
+    SyntaxError: invalid syntax
+%% Cell type:markdown id:75773a02-3ab4-4325-99fb-7a742e997f21 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+PyTorch tensors support regular arithmetic operations. For the dot product, you can use the <code>torch.dot()</code> function.
+```python
+vector_added = vector_tensor + 5
+vector_multiplied = vector_tensor * 2
+dot_product = torch.dot(vector_tensor, vector_tensor)
+print("Vector after addition:", vector_added)
+print("Vector after multiplication:", vector_multiplied)
+print("Dot Product:", dot_product)
+```
+</details>
+%% Cell type:markdown id:2b4766ba-ef9a-4f24-ba43-7358097a7b61 tags:
+#### **Matrices in PyTorch**
+A matrix in PyTorch is represented as a 2D tensor. Just as vectors are generalizations of scalars, matrices are generalizations of vectors, providing an additional dimension. Matrices are crucial for a range of operations in deep learning, including representing datasets, transformations, and more.
+%% Cell type:markdown id:2ec7544d-ef87-4773-88d8-cee731d1c43c tags:
+##### **Creating Matrices**
+Before diving into manual matrix creation, it's beneficial to know some utility functions PyTorch provides:
+- `torch.rand()`: Generates a matrix with random values between 0 and 1.
+- `torch.eye()`: Creates an identity matrix.
+- `torch.zeros()`: Generates a matrix filled with zeros.
+- `torch.ones()`: Generates a matrix filled with ones.
+You can explore more about these functions in the [official documentation](https://pytorch.org/docs/stable/tensors.html).
+> **Task**: Using the above functions, create the following matrices:
+> 1. A 3x3 matrix with random values.
+> 2. A 5x5 identity matrix.
+> 3. A 2x4 matrix filled with zeros.
+> 4. A 4x2 matrix filled with ones.
+%% Cell type:code id:5014b564-6bf5-4f00-a513-578ca72d94a8 tags:
+``` python
+# Your code for creating the matrices goes here
+```
+%% Cell type:markdown id:86b2708c-45c6-4b2c-b526-41491fcafa08 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To create these matrices, make use of the following functions:
+1. `torch.rand(size)`: Use this function and specify the size as `(3, 3)` to create a 3x3 matrix with random values.
+2. `torch.eye(n, m)`: Use this to generate an identity matrix. For a square matrix like 5x5, n and m would both be 5.
+3. `torch.zeros(m, n)`: For a 2x4 matrix filled with zeros, specify m=2 and n=4.
+4. `torch.ones(m, n)`: Similar to the `zeros` function but fills the matrix with ones.
+```python
+# 1. 3x3 matrix with random values
+random_matrix = torch.rand(3, 3)
+print(random_matrix)
+# 2. 5x5 identity matrix
+identity_matrix = torch.eye(5, 5)
+print(identity_matrix)
+# 3. 2x4 matrix filled with zeros
+zero_matrix = torch.zeros(2, 4)
+print(zero_matrix)
+# 4. 4x2 matrix filled with ones
+one_matrix = torch.ones(4, 2)
+print(one_matrix)
+```
+</details>
+%% Cell type:markdown id:60ff5e51-699e-46a1-8cc7-1d5fc9a4d078 tags:
+#### **Matrix Operations in PyTorch**
+Just like vectors, matrices can undergo a variety of operations. Some of the basic ones include matrix addition, subtraction, and multiplication. More advanced operations include matrix inversion, transposition, and determinant calculation.
+%% Cell type:markdown id:c6bdb9d9-b299-4d63-b92f-7c4b8c32a1b7 tags:
+##### **Basic Matrix Operations**
+> **Task**: Perform the following operations on matrices:
+> 1. Create two 3x3 matrices with random values.
+> 2. Add the two matrices.
+> 3. Subtract the second matrix from the first one.
+> 4. Multiply the two matrices element-wise.
+Remember, for matrix multiplication that results in the dot product, you'd use `torch.mm` or `@`, but for element-wise multiplication, you use `*`.
+Here's the [official documentation](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.matmul) on matrix operations for your reference.
+%% Cell type:code id:6be8c647-c455-4d3b-8a21-c4b7102ffa75 tags:
+``` python
+# Your code for creating the matrices and performing the operations goes here
+```
+%% Cell type:markdown id:0020b26b-b2bb-4efa-9bf3-3f037acd050e tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Here's how you can perform the given matrix operations:
+```python
+# 1. Create two 3x3 matrices with random values
+matrix1 = torch.rand(3, 3)
+matrix2 = torch.rand(3, 3)
+print("Matrix 1:\n", matrix1)
+print("\nMatrix 2:\n", matrix2)
+# 2. Add the two matrices
+sum_matrix = matrix1 + matrix2
+print("\nSum of matrices:\n", sum_matrix)
+# 3. Subtract the second matrix from the first one
+difference_matrix = matrix1 - matrix2
+print("\nDifference of matrices:\n", difference_matrix)
+# 4. Multiply the two matrices element-wise
+product_matrix = matrix1 * matrix2
+print("\nElement-wise product of matrices:\n", product_matrix)
+```
+</details>
+%% Cell type:markdown id:07f57464-76e2-4670-8332-3fcec2e162bd tags:
+#### **Higher-Dimensional Tensors in PyTorch**
+While scalars, vectors, and matrices cover 0D, 1D, and 2D tensors respectively, in deep learning, especially in tasks like image processing, you often encounter tensors with more than two dimensions.
+For instance, a colored image is often represented as a 3D tensor: height x width x channels (e.g., RGB channels). A batch of such images would then be a 4D tensor: batch_size x height x width x channels.
+Let's get our hands dirty with some higher-dimensional tensors!
+%% Cell type:markdown id:3dd1fea7-d290-49fe-ac1f-5a8387e3d386 tags:
+##### **Creating a 3D Tensor**
+> **Task**: Create a 3D tensor representing 2 images of size 4x4 with 3 channels (like RGB) filled with random values.
+Use the `torch.rand` function, and remember to specify the dimensions correctly.
+Here's the [official documentation](https://pytorch.org/docs/stable/tensors.html#creation-ops) for tensor creation.
+%% Cell type:code id:e7c8ac6e-f870-4b5d-ac2c-05be1d0cc9f1 tags:
+``` python
+# Your code for creating the 3D tensor goes here
+```
+%% Cell type:markdown id:efe61750-a91f-428a-b4e2-7df0cc2a782b tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Creating a 3D tensor with the given specifications can be achieved using the `torch.rand` function. Here's how:
+```python
+# Create a 3D tensor representing 2 images of size 4x4 with 3 channels
+image_tensor = torch.rand(2, 4, 4, 3)
+print(image_tensor)
+```
+</details>
+%% Cell type:markdown id:8cfbcaa0-a0f6-4869-ba94-65d4439a60ca tags:
+#### **Reshaping Tensors**
+In deep learning, we often need to reshape our tensors. For instance, an image represented as a 3D tensor might need to be reshaped into a 1D tensor before passing it through a fully connected layer. PyTorch provides methods to make this easy.
+The most commonly used method for reshaping tensors in PyTorch is the `view()` method. Another method that offers more flexibility (especially when you're unsure about the size of one dimension) is `reshape()`.
+>[Task]: Using the official documentation, find out how to use the [`view()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view) and [`reshape()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.reshape) methods. Create a 2x3 tensor using `torch.tensor()` and then reshape it into a 3x2 tensor.
+%% Cell type:code id:e6758ba7-aa35-42f0-87c1-86b88de64238 tags:
+``` python
+# Create a 2x3 tensor
+# Reshape it into a 3x2 tensor
+```
+%% Cell type:markdown id:fea31255-c2fe-47b2-b03b-c2b35953e05a tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To reshape a tensor using <code>view()</code> method:
+```python
+tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
+reshaped_tensor = tensor.view(3, 2)
+```
+<br>
+Alternatively, using the <code>reshape()</code> method:
+```python
+reshaped_tensor = tensor.reshape(3, 2)
+```
+</details>
+%% Cell type:markdown id:c580dbca-b75a-4b97-a24a-6a19c7cdf8d1 tags:
+#### **Broadcasting**
+Broadcasting is a powerful feature in PyTorch that allows you to perform operations between tensors of different shapes. When possible, PyTorch will automatically reshape the tensors in a way that makes the operation valid. This can significantly reduce manual reshaping and is efficient in memory usage.
+However, it's essential to understand the rules and nuances of broadcasting to use it effectively and avoid unexpected behaviors.
+>[Task]: Given a tensor `A` of shape (4, 1) and another tensor `B` of shape (1, 4), use PyTorch operations to produce a result tensor of shape (4, 4). Check the [official documentation on broadcasting](https://pytorch.org/docs/stable/notes/broadcasting.html) for guidance.
+%% Cell type:code id:44566fb7-87ed-41ef-a86e-db32a1cf2179 tags:
+``` python
+# Define tensor A of shape (4, 1) and tensor B of shape (1, 4)
+# Perform an operation to get a result tensor of shape (4, 4)
+```
+%% Cell type:markdown id:2602f2c4-f507-4a9a-8e8d-dee5e95efc61 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+You can simply use addition, subtraction, multiplication, or any other element-wise operation. When you do this operation, PyTorch will automatically broadcast the tensors to a compatible shape. For example:
+```python
+A = torch.tensor([[1], [2], [3], [4]])
+B = torch.tensor([[1, 2, 3, 4]])
+result = A * B
+print(result)
+```
+</details>
+%% Cell type:markdown id:ba2cc439-8ecc-4d92-b78f-39ef762678f8 tags:
+### **GPU Support with CUDA**
+%% Cell type:markdown id:575536c5-87a7-4781-8557-558627f14c0a tags:
+PyTorch seamlessly supports operations on Graphics Processing Units (GPUs) through CUDA, an API developed by NVIDIA for their GPUs. If you have a compatible NVIDIA GPU on your machine, PyTorch can utilize it to speed up tensor operations which can be orders of magnitude faster than on a CPU.
+To verify if your PyTorch installation can use CUDA, you can check the attribute `torch.cuda.is_available()`. This returns `True` if CUDA is available and PyTorch can use GPUs, otherwise it returns `False`.
+>[Task]: Print whether CUDA support is available on your system. The [CUDA documentation](https://pytorch.org/docs/stable/cuda.html) might be useful for this task.
+%% Cell type:code id:38e84bb7-5026-4262-8b78-b368c55a1450 tags:
+``` python
+# Check and print if CUDA is available
+cuda_available = None  # Replace None with the appropriate code
+print("CUDA available:", cuda_availablez
+```
+%% Cell type:markdown id:646b5660-5131-4ce0-9592-0fd14608c6df tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To check if CUDA is available, you can utilize the torch.cuda.is_available() function.
+```python
+cuda_available = torch.cuda.is_available()
+print("CUDA available:", cuda_available)
+```
+</details>
+%% Cell type:markdown id:86c8d7ed-0931-4874-bb27-e796ae1a1d7a tags:
+When developing deep learning models in PyTorch, it's a good habit to write device-agnostic code. This means your code can automatically use a GPU if available, or fall back to using the CPU if not. The `torch.device` object allows you to specify the device (either CPU or GPU) where you'd like your tensors to be allocated.
+To dynamically determine the device, a common pattern is to check `torch.cuda.is_available()`, and set the device accordingly. This is particularly useful when you want your code to be flexible, regardless of the underlying hardware.
+>[Task]: Define a `device` variable that is set to 'cuda:0' if CUDA is available and 'cpu' otherwise. Create a tensor on this device. The [documentation about torch.device](https://pytorch.org/docs/stable/tensor_attributes.html#torch-device) might be handy.
+%% Cell type:code id:91e05e75-03ad-44cb-9842-89e2017ee709 tags:
+``` python
+# Define the device
+device = None  # Replace None with the appropriate code
+# Create a tensor on the specified device
+tensor_on_device = torch.tensor([1, 2, 3, 4, 5], device=device)
+```
+%% Cell type:markdown id:3b80406b-b1cc-4831-a6ba-8e6385703755 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To define the device variable dynamically:
+```python
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+```
+<br>
+After setting the device, you can create tensors on it directly using the device argument.
+</details>
+%% Cell type:markdown id:574a2192-cc09-4d2c-8f01-97b051b7ffc8 tags:
+### **Automatic Differentiation with Autograd**
+%% Cell type:markdown id:7f5406f6-e295-4f70-a815-9eef18352390 tags:
+PyTorch's `autograd` module provides the tools for automatically computing the gradients for tensors. This feature is a cornerstone for neural network training, as gradients are essential for optimization algorithms like gradient descent.
+When we create a tensor, `requires_grad` is set to `False` by default, meaning it won't track operations. However, if we set `requires_grad=True`, PyTorch will start to track all operations on the tensor.
+Let's start with a simple example:
+>**Task:** Create a tensor that holds a single value, let's say 2, and set `requires_grad=True`. Then, define a simple operation like squaring the tensor. Finally, inspect the resulting tensor. The [documentation for requires_grad](https://pytorch.org/docs/stable/autograd.html#torch.Tensor.requires_grad) might be handy.
+%% Cell type:code id:fe63ab93-55be-434d-822f-8fd9cd727941 tags:
+``` python
+# TODO: Create a tensor, perform a simple operation, and print its data and grad_fn separately.
+```
+%% Cell type:markdown id:fa7ee20c-c2d6-4dcf-bb37-9eda580b5dc5 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To create a tensor with requires_grad=True and square it:
+```python
+# TODO: Create a tensor, perform a simple operation, and print its data and grad_fn separately.
+x = torch.tensor([2.0], requires_grad=True)
+y = x ** 2
+print("Data:", y.data)
+print("grad_fn:", y.grad_fn)
+```
+</details>
+%% Cell type:markdown id:c14dde16-a6be-4151-94cb-96ae98f0648a tags:
+Once the operation is executed on a tensor, a new attribute grad_fn is created. This attribute references a function that has created the tensor. In our example, since we squared the tensor, grad_fn will be of type PowBackward0.
+This grad_fn attribute provides a link to the computational history of the tensor, allowing PyTorch to backpropagate errors and compute gradients when training neural networks.
+%% Cell type:markdown id:0965e79e-558a-45a9-8ab2-614c503e59c0 tags:
+#### **Computing Gradients**
+%% Cell type:markdown id:36fb6c5b-9b39-4a2f-a767-61032b1b4ffc tags:
+Now, let's compute the gradients of `out` with respect to `x`. To do this, we'll call the `backward()` method on the tensor `out`.
+>[Task]: Compute the gradients of `out` by calling the `backward()` method on it. Afterwards, print the gradients of `x`. The [documentation for backward()](https://pytorch.org/docs/stable/autograd.html#torch.autograd.backward) may be useful.
+%% Cell type:code id:83685760-bde9-4327-88f7-cfe02bdb3309 tags:
+``` python
+# TODO: Compute the gradient and print it.
+```
+%% Cell type:markdown id:9b1d104b-efef-4fff-869d-8dde1131868e tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To compute the gradient:
+```python
+y.backward()
+print(x.grad)
+```
+</details>
+%% Cell type:markdown id:d7f5aecb-8623-481f-a5cf-f8b6dd0c9a37 tags:
+#### **Gradient Accumulation**
+%% Cell type:markdown id:1a4df0a1-12a0-4129-a258-915fa8440193 tags:
+In PyTorch, the gradients of tensors are accumulated into the `.grad` attribute each time you call `.backward()`. This means that if you call `.backward()` multiple times, the gradients will add up.
+However, by default, calling `.backward()` consumes the computational graph to save memory. If you intend to call `.backward()` multiple times on the same graph, you need to specify `retain_graph=True` during all but the last call.
+>[Task]: Create a tensor, perform an operation on it, and then call `backward()` twice. Use `retain_graph=True` in the first call to retain the computational graph. Observe the `.grad` attribute after each call.
+%% Cell type:code id:50a04095-9d7e-48ba-90ed-06718cd379f0 tags:
+``` python
+# Create a tensor
+w = torch.tensor([1.0], requires_grad=True)
+# Operation
+result = w * 2
+# TODO: Call backward twice (using retain_graph=True for the first call) and print the grad after each call
+# ...
+```
+%% Cell type:markdown id:d699e58d-d479-466a-b592-cbf68d185c3b tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+result.backward(retain_graph=True)
+print(w.grad)  # This should print 2
+result.backward()
+print(w.grad)  # This should print 4, as gradients get accumulated
+```
+</details>
+%% Cell type:markdown id:88d30f87-2469-4289-ad8a-51a25a2e8b82 tags:
+#### **Zeroing Gradients**
+%% Cell type:markdown id:2ea93580-9a35-4f5d-8f29-0a324d28d28a tags:
+In neural network training, we typically want to update our weights with the gradients after each forward and backward pass. This means that we don't want the gradients to accumulate across multiple passes. Hence, it's common to zero out the gradients at the start of a new iteration.
+>[Task]: Using the tensor from the previous cell, zero out its gradients and verify that it has been set to zero.
+%% Cell type:code id:9cb03a91-d1df-4bbf-a0d2-b5580c643e12 tags:
+``` python
+# TODO: Zero out the gradients of w and print
+```
+%% Cell type:markdown id:4a89ff66-b1ef-413a-a41c-847e8c832e4b tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+w.grad.zero_()
+print(w.grad)
+```
+</details>
+%% Cell type:markdown id:85f75515-3d89-4249-b00a-03c13cca92d4 tags:
+#### **Non-Scalar Backward**
+%% Cell type:markdown id:86a54a2c-e8c1-4278-a3fe-ed60564ebd07 tags:
+When dealing with non-scalar tensors, `backward` requires an additional argument: the gradient of the tensor with respect to some scalar (usually a loss).
+>[Task]: Create a tensor of shape (2, 2) with `requires_grad=True`. Compute a non-scalar result by multiplying the tensor with itself. Then, compute backward with a gradient argument. You can consult the [backward documentation](https://pytorch.org/docs/stable/autograd.html#torch.autograd.backward) for reference.
+%% Cell type:code id:cc0e4271-c356-4a4e-9a3a-5df1403a4211 tags:
+``` python
+# TODO: Create a tensor, perform an operation, and compute backward with a gradient argument
+```
+%% Cell type:markdown id:e7ee72f3-f51c-4849-b41d-136028029185 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+v = torch.tensor([[2.0, 3.0], [4.0, 5.0]], requires_grad=True)
+result = v * v
+grads = torch.tensor([[1.0, 1.0], [1.0, 1.0]])
+result.backward(grads)
+```
+</details>
+%% Cell type:markdown id:2e403021-4854-4e97-9898-82ed355293e7 tags:
+#### **Stopping Gradient Tracking**
+%% Cell type:markdown id:ba644253-8523-480d-8318-a87047671a21 tags:
+There are scenarios where we don't want to track the gradients for certain operations. This can be achieved in two main ways:
+1. **Using `torch.no_grad()`**: This context manager ensures that the enclosed operations are excluded from gradient tracking.
+2. **Using `.detach()`**: Creates a tensor that shares the same storage but does not require gradients.
+>[Task]: Create a tensor with `requires_grad=True`. Then, demonstrate both methods above to prevent gradient computation.
+%% Cell type:code id:1feb2f9b-0c5f-4e9d-b042-e74052bc83a9 tags:
+``` python
+# TODO: Demonstrate operations without gradient tracking
+```
+%% Cell type:markdown id:a5eff82b-bfbd-4be7-afa3-dc00f5341568 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+# Using torch.no_grad()
+with torch.no_grad():
+    result_no_grad = v * v
+print(result_no_grad.requires_grad)
+# Using .detach()
+detached_tensor = v.detach()
+result_detach = detached_tensor * detached_tensor
+print(result_detach.requires_grad)
+```
+</details>
+%% Cell type:markdown id:efe66a5d-ac63-4623-8182-3b5aff58abbe tags:
+## **Building a Simple Neural Network with PyTorch**
+%% Cell type:markdown id:aa4b7630-fc1e-4f7b-b86b-3c0d233cdc49 tags:
+Neural networks are the cornerstone of deep learning. They are organized as a series of interconnected nodes or "neurons" that are structured into layers: an input layer, several hidden layers, and an output layer. Data flows through this network, undergoing transformations at each node, until it emerges at the output.
+With PyTorch's `torch.nn` module, constructing these neural networks becomes straightforward. Let's dive into its main components:
+%% Cell type:markdown id:8e98f379-5580-477c-8b7b-c641f5edf710 tags:
+### **nn.Module: The Base Class for Neural Networks**
+%% Cell type:markdown id:15d72ea2-c846-44f5-85d5-bd1990c154bc tags:
+Every neural network in PyTorch is derived from the `nn.Module` class. This class offers:
+- Organization and management of the layers.
+- Capabilities for GPU acceleration.
+- Implementation of the forward pass.
+When we inherit from `nn.Module`, our custom neural network class benefits from these functionalities.
+For more details, you can refer to the official [documentation](https://pytorch.org/docs/stable/generated/torch.nn.Module.html).
+>**Task:** Familiarize yourself with the structure of a simple neural network provided below. Later, you'll be enriching it.
+%% Cell type:code id:425abefe-54b9-4944-bc6e-cc78de892c66 tags:
+``` python
+import torch.nn as nn
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        # Define layers here
+    def forward(self, x):
+        # Call the layers in the correct order here
+        return x
+```
+%% Cell type:markdown id:892e3b55-097b-436e-bbf8-a380fd7d9e35 tags:
+### **Linear Layers: Making Connections**
+%% Cell type:markdown id:564c17bb-543f-42f6-8c5d-b855ccaf71e6 tags:
+In PyTorch, a linear layer performs an affine transformation. It has both weights and biases which get updated during training. The transformation it performs can be described as:
+$ y = xA^T + b $
+Where:
+- \( x \) is the input
+- \( A \) represents the weights
+- \( b \) is the bias
+The `nn.Linear` class in PyTorch creates such a layer.
+[Documentation Link for nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html)
+> **Task:** Add an input layer and an output layer to the `SimpleNet` class.
+>
+> - The input layer should transform from `input_size` to `hidden_size`.
+> - The output layer should transform from `hidden_size` to `output_size`.
+> - After defining the layers in the `__init__` method, call them in the `forward` method to perform the transformations.
+%% Cell type:code id:daa8829a-05e9-474e-b6e6-c7f749e22295 tags:
+``` python
+# Modify the below code by adding input and output linear layers in the appropriate places
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        # Define layers here
+    def forward(self, x):
+        # Call the layers in the correct order here
+        return x
+```
+%% Cell type:markdown id:c5038840-2713-4492-b7ab-c70469a2e96e tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To define the input and output linear layers, use the `nn.Linear` class in the `__init__` method:
+Then, in the `forward` method, pass the input through the defined layers.
+```python
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        self.input_layer = nn.Linear(input_size, hidden_size)
+        self.output_layer = nn.Linear(hidden_size, output_size)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.output_layer(x)
+        return x
+```
+</details>
+%% Cell type:markdown id:c2bb82c9-8949-4472-84fe-def36c514150 tags:
+### **Activation Functions: Introducing Non-Linearity**
+%% Cell type:markdown id:d989e2d8-5530-45f3-8664-e0d1b9eb627a tags:
+Activation functions are critical components in neural networks, introducing non-linearity between layers. This non-linearity allows networks to learn from the error and make adjustments, which is essential for learning complex patterns.
+In PyTorch, many activation functions are available as part of the `torch.nn` module, such as ReLU, Sigmoid, and Tanh.
+For our `SimpleNet` model, we'll use the ReLU (Rectified Linear Unit) activation function after the input layer. The ReLU function is defined as \(f(x) = max(0, x)\).
+Learn more about [ReLU and other activation functions in the official documentation](https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity).
+> **Task**: Update your `SimpleNet` class to include the ReLU activation function after the input layer. For this, you'll need to both define the activation function in `__init__` and apply it in the `forward` method.
+%% Cell type:code id:9e426301-5a55-46a2-8305-241b8f1ca4bf tags:
+``` python
+# Copy the previous SimpleNet definition and modify the code to include the ReLU activation function.
+```
+%% Cell type:markdown id:212ef244-f7bf-49a2-b4c9-b1b90af315de tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To include the ReLU activation in your neural network:
+1. Define the ReLU activation function in the `__init__` method.
+2. Apply the activation function in the `forward` method after passing through the `input_layer`.
+```python
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        self.input_layer = nn.Linear(input_size, hidden_size)
+        self.relu = nn.ReLU()  # Defining the ReLU activation function
+        self.output_layer = nn.Linear(hidden_size, output_size)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.relu(x)  # Applying the ReLU activation function
+        x = self.output_layer(x)
+        return x
+```
+</details>
+%% Cell type:markdown id:640ef2f4-6816-4c5e-955c-c14c33349512 tags:
+#### **Adjusting the Network: Adding Dropout**
+%% Cell type:markdown id:e5596abf-b262-461d-ad5f-6a3488a79a42 tags:
+[Dropout](https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html) is a regularization technique that can improve generalization in neural networks. It works by randomly setting a fraction of input units to 0 at each update during training time.
+> **Task**: Modify the `SimpleNet` class to include a dropout layer with a dropout probability of 0.5 between the input layer and the output layer. Don't forget to call this layer in the forward method.
+>
+> Remember, after modifying the class structure, you'll need to re-instantiate your model object.
+%% Cell type:code id:1c68ffd4-1de6-4d77-a15f-705b24c924af tags:
+``` python
+# Add a dropout layer to your previous code
+```
+%% Cell type:markdown id:d78c2dab-95c1-441c-b661-80bfba9a2dfd tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Here's how you can modify the SimpleNet class to include dropout:
+```python
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        self.input_layer = nn.Linear(input_size, hidden_size)
+        self.dropout = nn.Dropout(0.5)
+        self.output_layer = nn.Linear(hidden_size, output_size)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.dropout(x)
+        return self.output_layer(x)
+model = SimpleNet(input_size, hidden_size, output_size).to(device)
+```
+Don't forget to create a new instance of your model: model = SimpleNet(input_size, hidden_size, output_size).to(device)
+</details>
+%% Cell type:markdown id:ce1cb22c-8288-4c69-9dcb-56896de49794 tags:
+### **Utilizing the Neural Network**
+%% Cell type:markdown id:255c3bf2-419d-4d14-82d6-7959e9280670 tags:
+Once our neural network is defined, it's time to put it to use. This section will cover:
+1. Instantiating the network
+2. Transferring the network to GPU (if available)
+3. Making predictions using the network (forward pass)
+4. Understanding training and evaluation modes
+5. Performing a backward pass to compute gradients
+%% Cell type:markdown id:9f28cee5-c7a0-48c5-8341-6da6fae516c5 tags:
+#### **1. Instantiating the Network**
+%% Cell type:markdown id:0760bef6-d77a-4b7b-b5c7-18b208d93b98 tags:
+To use our `SimpleNet`, we first need to create an instance of it. While creating an instance, the network's weights are also initialized.
+> **Task**: Instantiate the `SimpleNet` class. Use `input_size=5`, `hidden_size=3`, and `output_size=1` as parameters.
+%% Cell type:code id:ae9bfc87-5b09-476c-b32b-92c09f992fe3 tags:
+``` python
+# Your code here: Instantiate the model
+```
+%% Cell type:markdown id:f951e5d2-e0b4-451d-9a9b-44256f8a224c tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To instantiate the SimpleNet class:
+```python
+model = SimpleNet(input_size=5, hidden_size=3, output_size=1)
+print(model)
+```
+</details>
+%% Cell type:markdown id:35567e41-6de6-429b-be4b-a14598313aca tags:
+#### **2. Transferring the Network to GPU**
+%% Cell type:markdown id:b3f3b3c3-4d7a-46db-9634-1e14b277c808 tags:
+PyTorch makes it very straightforward to transfer our model to a GPU if one is available. This is done using the .to() method.
+> **Task**: Check if GPU (CUDA) is available. If it is, transfer the model to the GPU.
+%% Cell type:code id:91cb61a0-d890-4697-88d9-7749ea2bf144 tags:
+``` python
+# Check for GPU availability and transfer the model to GPU if available.
+```
+%% Cell type:markdown id:8a405f2d-3d8d-4e4c-90d1-54a05ff08b90 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To transfer the model to the GPU if it's available:
+```python
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+```
+</details>
+%% Cell type:markdown id:175ab7cc-cddf-4460-ab01-f0193c2908d7 tags:
+#### **3. Making Predictions using the Network (Forward Pass)**
+%% Cell type:markdown id:e3724444-e0a6-48b0-8872-0b53b000a3bd tags:
+With our model instantiated and potentially on a GPU, we can use it to make predictions. This involves passing some input data through the model, which is commonly referred to as a forward pass.
+> **Task**: Create a tensor of size [1, 5] (representing one sample with five features) with random values. Transfer this tensor to the same device as your model (GPU or CPU). Then, pass this tensor through your model to get the prediction.
+%% Cell type:code id:00e818ee-72e0-4960-a87e-a27b771d58eb tags:
+``` python
+# Create a tensor, transfer it to the right device, and perform a forward pass.
+```
+%% Cell type:markdown id:8bc38fde-0c14-45a6-b237-76ec7beab7f0 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To make predictions using your model:
+```python
+# Create a tensor with random values
+input_tensor = torch.randn(1, 5).to(device)
+# Pass the tensor through the model
+output = model(input_tensor)
+print(output)
+```
+</details>
+%% Cell type:markdown id:fad9f46f-b591-4a2f-b2bf-3b4cf54cf961 tags:
+#### **4. Understanding Training and Evaluation Modes**
+%% Cell type:markdown id:2f197278-8d74-4a69-8da9-caf3f952e7bc tags:
+Every PyTorch model has two modes:
+- `train` mode: In this mode, certain layers like dropout or batch normalization behave differently than during evaluation. For instance, dropout will randomly set a fraction of input units to 0 at each update during training.
+- `eval` mode: Here, the model behaves in a deterministic manner. Dropout layers don't drop activations, and batch normalization uses the entire dataset's statistics instead of the current mini-batch's statistics.
+Setting the model to the correct mode is crucial. Let's demonstrate this.
+> **Task**: Set your model to `train` mode, then perform a forward pass using the same input tensor multiple times and observe the outputs. Then, set your model to `eval` mode and repeat. Notice any differences?
+%% Cell type:code id:4c2d921d-d409-4ae6-8ee4-8376fc9a209d tags:
+``` python
+# Perform the forward passes multiple times with the same input in both modes and observe the outputs.
+```
+%% Cell type:markdown id:0dbd65fa-b86b-4516-9fb1-aceae0c9d8a3 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Here's how you can demonstrate the difference:
+```python
+# Set to train mode
+model.train()
+# Forward pass multiple times
+print("Train mode:")
+for i in range(5):
+    print(model(input_tensor))
+# Set to eval mode
+model.eval()
+print("Eval mode:")
+# Forward pass multiple times
+for i in range(5):
+    print(model(input_tensor))
+```
+If there were layers like dropout in your model, you'd notice that the outputs in training mode might differ on each pass, while in evaluation mode, they remain consistent.
+</details>
+%% Cell type:markdown id:e8c55be3-71f7-45e7-91d1-c556e8108fef tags:
+## **The Training Procedure in PyTorch**
+%% Cell type:markdown id:eac54af7-c8db-4a19-861b-2eecf68fb44e tags:
+Training a neural network involves several key components: defining a loss function to measure errors, selecting an optimization method to adjust the model's weights, and iterating over the dataset multiple times. In this section, we will break down these components step by step, starting with the basics and moving towards more complex tasks.
+%% Cell type:markdown id:3e9231a9-105c-4aed-bfa5-846ddc07245f tags:
+### **Datasets and DataLoaders: Handling and Batching Data**
+%% Cell type:markdown id:8dbc3fcf-5a29-4fd8-9e82-3eaae4c8dc90 tags:
+In PyTorch, the torch.utils.data.Dataset class is used to represent a dataset. This abstract class requires the implementation of two primary methods: __len__ (to return the number of items) and __getitem__ (to return the item at a given index). However, PyTorch provides a utility class, TensorDataset, that wraps tensors in the dataset format, making it easier to use with the DataLoader.
+The torch.utils.data.DataLoader class is a more powerful tool, responsible for:
+- Batching the data
+- Shuffling the data
+- Loading the data in parallel using multiprocessing workers
+Let's wrap some data in a Dataset and use a DataLoader to handle batching and shuffling.
+> **Task**: Convert the input and target tensors into a dataset and dataloader. For this exercise, set the batch size to 32.
+Below we define synthetic data that is learnable.
+This way, we're essentially modeling the relationship $y=mx+c+noise$  where:
+- $y$ is the target or output.
+- $m$ is the slope of the line.
+- $c$ is the y-intercept.
+- $x$ is the input.
+- $noise$ is a small random value added to each point to make the data more realistic.
+%% Cell type:code id:f8335e62-e0c0-4381-9c20-1ca8ed78516c tags:
+``` python
+num_samples = 1000
+# Define the relationship
+m = 2.0
+c = 1.0
+noise_factor = 0.05
+# Generate input tensor
+input_tensor = torch.linspace(-10, 10, num_samples).view(-1, 1)
+# Generate target tensor based on the relationship
+target_tensor = m * input_tensor + c + noise_factor * torch.randn(num_samples, 1)
+import matplotlib.pyplot as plt
+plt.figure(figsize=(10,6))
+plt.scatter(input_tensor.numpy(), target_tensor.numpy(), color='blue', marker='o')
+plt.title("Synthetic Data Visualization")
+plt.xlabel("Input")
+plt.ylabel("Target")
+plt.grid(True)
+plt.show()
+```
+%% Cell type:code id:9535ad7e-6534-491b-b38d-b61cdd60b39d tags:
+``` python
+# Convert our data into a dataset
+# ...
+# Create a data loader for mini-batch training
+# ...
+```
+%% Cell type:markdown id:da99866e-ebd0-403d-8159-8a36d601bf09 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Use the TensorDataset class from torch.utils.data to wrap your tensors in a dataset format. After defining your dataset, you can use the DataLoader class to create an iterator that will return batches of data.
+```python
+from torch.utils.data import DataLoader, TensorDataset
+# Convert our data into a dataset
+dataset = TensorDataset(input_tensor, target_tensor)
+# Create a data loader for mini-batch training
+batch_size = 32
+data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+```
+</details>
+%% Cell type:markdown id:ea5aee0c-6c8a-485f-b099-9844a28bafa3 tags:
+> **Task**: Explore the `dataset` and `data_loader`:
+> 1. Print the total number of samples in the dataset and DataLoader.
+> 2. Iterate one time over both and print the shape of items you retrieve.
+%% Cell type:code id:244a8198-60c5-4154-93ab-3d96fbf3488a tags:
+``` python
+# Total number of samples
+# ...
+# Dataset elements
+# ...
+# DataLoader elements
+# ...
+```
+%% Cell type:markdown id:882438f7-3cc7-4a20-a223-41ede7856ef4 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+When you iterate over the dataset, each item you get from the iteration should be a tuple of (input, target), so you should retrieve two elements each of len 1.
+On the other hand, when you iterate over the data_loader, each item you get from the iteration is a mini-batch of data. Thus, the length you get from each iteration should correspond to the batch size you've set (i.e., 5 in our case), except possibly the last batch if the dataset size isn't a perfect multiple of the batch size.
+```python
+# Total number of samples
+print(f"Total samples in dataset: {len(dataset)}")
+print(f"Total batches in DataLoader: {len(data_loader)}")
+# Dataset elements
+(index, (data, target)) = next(enumerate(dataset))
+print(f"Sample {index}: Data shape {data.shape}, Target shape {target.shape}")
+# DataLoader elements
+(index, (batch_data, batch_target)) = next(enumerate(data_loader))
+print(f"Batch {index}: Data shape {batch_data.shape}, Target shape {batch_target.shape}")
+```
+</details>
+%% Cell type:markdown id:8dc08bb3-e5b2-4a7d-be10-6adc496a812d tags:
+### **Splitting the Dataset: Training, Validation, and Testing Sets**
+%% Cell type:markdown id:659a4899-cb14-4a47-b990-ea1a77592102 tags:
+When training neural networks, it's common to split the dataset into at least two sets:
+1. **Training Set**: This set is used to train the model, i.e., adjust the weights using gradient descent.
+2. **Validation Set** (optional, but often used): This set is used to evaluate the model during training, allowing for hyperparameter tuning without overfitting.
+3. **Test Set**: This set is used to evaluate the model's performance after training, providing an unbiased assessment of its performance on new, unseen data.
+In PyTorch, we can use the `random_split` function from `torch.utils.data` to easily split datasets.
+First, let's define the lengths for each split:
+%% Cell type:code id:32202871-2911-44e6-8ad6-6d848cb3ede0 tags:
+``` python
+total_samples = len(dataset)
+train_size = int(0.8 * total_samples)
+val_size = total_samples - train_size
+```
+%% Cell type:markdown id:a1f7a839-8ee0-460f-bef0-87ca30f7409e tags:
+> **Task**: Using the random_split function, split the dataset into a training set and a validation set using the sizes provided above.
+[Here's the documentation for random_split](https://pytorch.org/docs/stable/data.html#torch.utils.data.random_split).
+> **Task**: Create the train_loader and val_loader
+%% Cell type:code id:50a80fc9-ef6e-4118-ad6a-3dea9d16e94f tags:
+``` python
+# Splitting the dataset
+```
+%% Cell type:markdown id:b01bb0d7-17c0-4edd-a2b6-17e4ca74b2aa tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+# Splitting the dataset
+from torch.utils.data import random_split
+train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
+train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
+```
+</details>
+%% Cell type:markdown id:e2729431-701c-4451-931c-2ae0ed58dbb5 tags:
+> **Task**: Now, using the provided training and validation datasets, print out the number of samples in each set. Also, fetch one sample from each set and print its shape.
+%% Cell type:code id:770c42f6-7a52-4856-a4fe-23a60666389a tags:
+``` python
+# Your code here
+```
+%% Cell type:markdown id:583948e8-898a-4336-92c6-aaddef6adbcf tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+# Print number of samples in each set
+print(f"Number of training samples: {len(train_dataset)}")
+print(f"Number of validation samples: {len(val_dataset)}")
+# Fetching one sample from each set and printing its shape
+train_sample, train_target = train_dataset[0]
+print(f"Training sample shape: {train_sample.shape}, Target shape: {train_target.shape}")
+val_sample, val_target = val_dataset[0]
+print(f"Validation sample shape: {val_sample.shape}, Target shape: {val_target.shape}")
+```
+</details>
+%% Cell type:markdown id:0fdec6d6-9b32-457d-b8e6-d94d8e020e4f tags:
+### **Loss Functions: Measuring Model Errors**
+%% Cell type:markdown id:899ce66c-e878-4f6a-b37c-34cdeae438a1 tags:
+Every training process needs a metric to determine how well the model's predictions align with the actual data. This metric is called the loss function or cost function. PyTorch provides many [loss functions](https://pytorch.org/docs/stable/nn.html#loss-functions) suitable for different types of tasks.
+Different problems might require different loss functions. PyTorch provides a variety of [loss functions](https://pytorch.org/docs/stable/nn.html#loss-functions) suited for different tasks. For instance:
+- **Mean Squared Error (MSE)**: Commonly used for regression tasks.
+- **Cross-Entropy Loss**: Suited for classification tasks.
+For a simple regression task, a common choice is the Mean Squared Error (MSE) loss.
+> **Task**: Familiarize yourself with the [MSE loss documentation](https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html). You will soon use it in the training loop.
+> **Task**:  Instantiate the Mean Squared Error (MSE) loss provided by PyTorch for our current neural network.
+%% Cell type:code id:692e83d7-7382-4ab2-9caf-daa3a77bfd4d tags:
+``` python
+# Define the loss function.
+```
+%% Cell type:markdown id:7fe8dcb5-8a43-4561-88a0-a4a2a2d1bf53 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To define the MSE loss in PyTorch, you can use:
+```python
+criterion = nn.MSELoss()
+```
+</details>
+%% Cell type:markdown id:e957d999-0a56-4320-808a-05d1af6b81c7 tags:
+### **Optimizers: Adjusting Weights**
+%% Cell type:markdown id:d3d4a09d-8838-4fd3-9e16-bfdc5018abde tags:
+Optimizers adjust the weights of the network based on the gradients computed during backpropagation. Different optimizers might update weights in varying ways. For example, the popular **Stochastic Gradient Descent (SGD)** optimizer simply updates weights in the direction of negative gradients, while **Adam** and **RMSprop** are more advanced optimizers that consider aspects like momentum and weight decay.
+PyTorch offers a wide range of [optimizers](https://pytorch.org/docs/stable/optim.html).
+> **Task**: Review the [SGD optimizer documentation](https://pytorch.org/docs/stable/optim.html#torch.optim.SGD). It will be pivotal in the training loop you'll construct.
+> **Task**: For this exercise, let's use the SGD optimizer. Instantiate it, setting our neural network parameters as the ones to be optimized and choosing a learning rate of 0.01.
+%% Cell type:code id:39c8dfa8-7ea0-44e4-9429-118a6333bfe1 tags:
+``` python
+# Define the optimizer.
+```
+%% Cell type:markdown id:05e37f67-519a-4c49-97b3-2fafb7176de1 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To define the SGD optimizer in PyTorch, you can use:
+```python
+optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
+```
+Because of how simple the task is, you will probably need a really small learning rate to reach good results.
+</details>
+%% Cell type:markdown id:13b2fb3e-5391-4e66-ba83-55e66935d2aa tags:
+### **Setting Up the Basic Training Loop Function**
+%% Cell type:markdown id:7a364925-b4d9-4ffd-b3f8-be30a5bb1613 tags:
+Having a training loop within a function allows us to reuse the same code structure for different models, datasets, or other training parameters without redundancy. This modular approach also promotes code clarity and maintainability.
+Let's define the training loop function which takes the model, data (inputs and targets), loss function, optimizer, and the number of epochs as parameters. The function should return the history of the loss after each epoch.
+A typical training loop consists of:
+1. Sending the input through the model (forward pass).
+2. Calculating the loss.
+3. Propagating the loss backward through the model to compute gradients (backward pass).
+4. Updating the weights using the optimizer.
+5. Repeating the steps for several epochs.
+Training with the entire dataset as one batch can be memory-intensive and sometimes not as effective. Hence, in practice, we usually divide our dataset into smaller chunks or mini-batches and update our weights after each mini-batch.
+> **Task**: Create a function named `train_model` that encapsulates the training loop for the `SimpleNet` model. The function should follow the signature the next code cell:
+%% Cell type:code id:734864fe-46b6-4435-b58d-19b085ebd3f9 tags:
+``` python
+def train_model(model, dataloader, loss_function, optimizer, epochs):
+    # Your code here
+    pass
+```
+%% Cell type:markdown id:a6fee8dc-59da-4d48-918e-d6e093e997e5 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Here's how the train_model function might look:
+```python
+def train_model(model, dataloader, loss_function, optimizer, epochs):
+    # Store the loss values at each epoch
+    loss_history = []
+    for epoch in range(epochs):
+        for inputs, targets in dataloader:
+            # Ensure that data is on the right device
+            inputs, targets = inputs.to(device), targets.to(device)
+            # Reset the gradients to zero
+            optimizer.zero_grad()
+            # Execute a forward pass
+            outputs = model(inputs)
+            # Calculate the loss
+            loss = loss_function(outputs, targets)
+            # Conduct a backward pass
+            loss.backward()
+            # Update the weights
+            optimizer.step()
+            # Append the loss to the history
+            loss_history.append(loss.item())
+        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss_history[-1]:.4f}")
+    return loss_history
+```
+</details>
+%% Cell type:markdown id:c4e4b485-ffa6-487d-8dbc-b0b0590a796a tags:
+### **Training the Neural Network**
+%% Cell type:markdown id:15ba6b07-728f-4444-a3a9-af8cfeb884e1 tags:
+With all the components defined in the previous sections, it's now time to integrate everything and set the training process in motion.
+> **Task**: Combine all the previously defined elements to initiate the training procedure for your neural network model.
+> 1. Don't forget to Move your model and to the same device (GPU or CPU).
+> 2. Train the model using the `train_loader` and `val_loader`.
+%% Cell type:code id:90d043f7-213d-42a7-a14b-e6b716003b70 tags:
+``` python
+# Your code here to initiate the training process
+```
+%% Cell type:markdown id:398aaeec-5d6d-4ef6-bd24-27d51b32c148 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To train the model, you need to integrate all the previously defined components:
+```python
+# Moving the model to the device
+model = SimpleNet(input_size=1, hidden_size=10, output_size=1).to(device)
+# Training the model using the train_loader
+loss_history = train_model(model, train_loader, criterion, optimizer, epochs=50)
+```
+Make sure you have defined the loss_function, optimizer, and epochs in the previous sections.
+</details>
+%% Cell type:code id:c7cf3df1-9fe2-4eee-a5bf-386f77b257f1 tags:
+``` python
+import matplotlib.pyplot as plt
+# Plotting the loss curve
+plt.figure(figsize=(10,6))
+plt.plot(loss_history, label='Training Loss')
+plt.title("Loss Curve")
+plt.xlabel("Epochs")
+plt.ylabel("Loss")
+plt.legend()
+plt.grid(True)
+plt.show()
+```
+%% Cell type:markdown id:2b7f9d87-c172-427c-a2f4-1090b1120148 tags:
+## **Conclusion: Moving Beyond the Basics**
+%% Cell type:markdown id:6074877c-c149-4af9-8503-153455edd42a tags:
+You've now built and trained a simple neural network using PyTorch, and you might be wondering: why aren't my results as good as I expected?
+While you've certainly made strides, the journey of mastering deep learning and neural networks is filled with nuance, challenges, and constant learning. Here are some reasons why your results might not be optimal and what you'll discover in your next steps:
+1. **Hyperparameters Tuning**: So far, we've set values like learning rate and batch size somewhat arbitrarily. These values are critical and often require careful tuning specific to each problem.
+2. **Learning Rate Scheduling**: A fixed learning rate might not always be the best strategy. Reducing the learning rate during training, known as learning rate annealing or scheduling, often leads to better convergence.
+3. **Model Architecture**: The neural network we built is basic. There's an entire world of architectures out there, designed for specific types of data and tasks. The right architecture can make a significant difference.
+4. **Regularization**: To prevent overfitting, techniques like dropout, weight decay, and early stopping can be applied. We haven't touched upon these, but they're crucial for ensuring your model generalizes well to unseen data.
+5. **Data Quality and Quantity**: While we used synthetic data for simplicity, real-world data is messy. Cleaning and preprocessing data, augmenting it, and ensuring it's representative can have a significant impact on performance.
+6. **Optimization Techniques**: There are advanced optimization algorithms and techniques that can speed up training and lead to better convergence. Techniques like momentum, adaptive learning rates (e.g., Adam, RMSprop) can play a crucial role.
+7. **Evaluation Metrics**: We've looked at loss values, but in real-world scenarios, understanding and selecting the right evaluation metrics for the task (accuracy, F1-score, AUC-ROC, etc.) is vital.
+8. **Training Dynamics**: Understanding how models train, visualizing the activations, weights, and gradients, and knowing when and why a model is struggling can offer insights into how to improve performance.
+Remember, while the mechanics of building and training a neural network are essential, the art of deep learning lies in understanding the nuances and iterating based on insights and knowledge. The next steps in your learning, focusing on methodology, will provide the tools and knowledge to navigate these complexities and achieve better results.
+Keep learning, experimenting, and iterating! The world of deep learning is vast, and there's always something new to discover.
+%% Cell type:markdown id:ca6048e4-f3cf-40eb-bd50-c95f281f0554 tags:
+## **Extra for the Fast Movers: Diving Deeper**
+%% Cell type:markdown id:46a25dfd-1cc9-444d-98d6-966e7cc9da07 tags:
+To further enhance your understanding and capability with PyTorch, this section introduces additional topics that cater to more advanced use-cases. These tools and techniques can be essential when dealing with larger and more complex projects, providing valuable insights into optimization and performance.
+%% Cell type:markdown id:30edeed8-321b-4b1f-ace6-0decd8a167e5 tags:
+### **Profiling with PyTorch Profiler in TensorBoard**
+%% Cell type:markdown id:256bd4a2-aa6f-4a50-9c5d-854ca25293de tags:
+PyTorch, starting from version 1.9.0, incorporates the PyTorch Profiler as a TensorBoard plugin. This integration allows users to profile their PyTorch code and visualize the results directly within TensorBoard.
+Below, we will be instrumenting PyTorch Code for TensorBoard Profiling.
+Use this [documentation](http://www.idris.fr/jean-zay/pre-post/profiler_pt.html) to achieve the next tasks.
+> **Task:** Before instrumenting your PyTorch code, you'll need to import the necessary modules for profiling.
+> **Task:** Modify the training loop to invoke the profiler.
+%% Cell type:code id:86b471a6-7de6-40f0-af58-c41e8e8acbae tags:
+``` python
+# Your imports here
+# Your code here
+def train_model_with_profiling(model, train_loader, criterion, optimizer, epochs, profiler_dir='./profiler'):
+    # Your code here
+    pass
+```
+%% Cell type:markdown id:f389816a-fa2a-4668-9f0b-07d2a5abf5e1 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+from torch.profiler import profile, tensorboard_trace_handler, ProfilerActivity, schedule
+def train_model_with_profiling(model, dataloader, loss_function, optimizer, epochs, profiler_dir='./profiler'):
+    # Store the loss values at each epoch
+    loss_history = []
+    with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+                 schedule=schedule(wait=1, warmup=1, active=12, repeat=1),
+                 on_trace_ready=tensorboard_trace_handler(profiler_dir)) as prof:
+        for epoch in range(epochs):
+            for inputs, targets in dataloader:
+                # Ensure that data is on the right device
+                inputs, targets = inputs.to(device), targets.to(device)
+                # Reset the gradients to zero
+                optimizer.zero_grad()
+                # Execute a forward pass
+                outputs = model(inputs)
+                # Calculate the loss
+                loss = loss_function(outputs, targets)
+                # Conduct a backward pass
+                loss.backward()
+                # Update the weights
+                optimizer.step()
+                # Append the loss to the history
+                loss_history.append(loss.item())
+                # Notify profiler of step boundary
+                prof.step()
+            print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss_history[-1]:.4f}")
+    return loss_history
+```
+Make sure you have defined the loss_function, optimizer, and epochs in the previous sections.
+</details>
+%% Cell type:code id:cb82f0a9-522f-4746-87f9-ba7b7952d863 tags:
+``` python
+# Training the model using the train_loader
+loss_history = train_model_with_profiling(model, train_loader, criterion, optimizer, 10, profiler_dir='./profiler')
+```
+%% Cell type:markdown id:313e4f40-521a-4beb-a278-c1ca9502b499 tags:
+> **Task:** Visualize the profiling, you will need to open a Tensorboard interface using the Blue button on the top left corner.
+>
+> **Make sur to specify the logdir with "--logid=/path/to/profiler_folder".**
+%% Cell type:markdown id:06f86768-3b78-4874-b083-64bc365080fb tags:
+### **Learning Rate Scheduling**
+%% Cell type:markdown id:44721444-ba4a-44d0-9b65-16890dd4f097 tags:
+One of the key hyperparameters to tune during neural network training is the learning rate. While it's possible to set a static learning rate for the entire training process, in practice, dynamically adjusting the learning rate often leads to better convergence and overall performance. This dynamic adjustment is often referred to as learning rate scheduling or annealing.
+Concept of Learning Rate Scheduling
+The learning rate determines the step size at each iteration while moving towards a minimum of the loss function. If it's too large, the optimization might overshoot the minimum. Conversely, if it's too small, the training might get stuck, or convergence could be very slow.
+A learning rate scheduler changes the learning rate during training based on the provided scheduling policy. By adjusting the learning rate during training, you can achieve faster convergence and better final results.
+Using Learning Rate Schedulers in PyTorch
+PyTorch provides a variety of learning rate schedulers through the torch.optim.lr_scheduler module. Some of the popular ones are:
+- StepLR: Decays the learning rate of each parameter group by gamma every step_size epochs.
+- ExponentialLR: Decays the learning rate of each parameter group by gamma every epoch.
+- ReduceLROnPlateau: Reduces the learning rate when a metric has stopped improving.
+> **Task:** Take a look at the [documentation]() or click on the hint in the following cell then integrate an LR scheduler in your own code that you wrote before
+%% Cell type:markdown id:0c79a170-35d0-438f-b01b-a3f236f8b724 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Below, you have a typical training loop with a learning rate scheduler.
+```python
+from torch.optim.lr_scheduler import StepLR
+optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
+scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
+for epoch in range(epochs):
+    for input, target in data:
+        optimizer.zero_grad()
+        output = model(input)
+        loss = loss_fn(output, target)
+        loss.backward()
+        optimizer.step()
+    # Step the learning rate scheduler
+    scheduler.step()```
+</details>
+%% Cell type:markdown id:33f99f6e-3120-495a-a25b-8b9f3d14deb2 tags:
+### **Automatic Mixed Precision**
+%% Cell type:markdown id:217a7249-6655-4587-92b8-72dea7de8c9d tags:
+Training deep neural networks can be both time-consuming and resource-intensive. One way to address this problem is by leveraging mixed precision training. In essence, mixed precision training uses both 16-bit and 32-bit floating-point types to represent numbers in the model, which can speed up training without sacrificing the accuracy of the final model.
+**Overview of AMP (Automatic Mixed Precision)**
+AMP (Automatic Mixed Precision) is a set of utilities provided by PyTorch to enable mixed precision training more effortlessly. The main advantages of AMP are:
+- Faster Training: By using reduced precision, the model requires less memory bandwidth, resulting in faster data transfers and faster matrix multiplication.
+- Reduced GPU Memory Usage: This enables training of larger models or utilization of larger batch sizes.
+PyTorch has integrated the AMP utilities starting from version 1.6.
+> **Task**: Setup AMP in the training function by checking the [documentation](http://www.idris.fr/eng/ia/mixed-precision-eng.html). You will need to do the necessary imports, initialize the GradScaler, modify the training loop by including "with autocast():" around the forward and loss computation.
+%% Cell type:code id:ad131b4b-02ba-472d-af78-a048868e3efc tags:
+``` python
+# Your code here
+```
+%% Cell type:markdown id:de38cb30-7b24-48cb-b804-ed296e38e3fb tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Below, you have a typical training loop with autocast.
+```python
+from torch.cuda.amp import autocast, GradScaler
+scaler = GradScaler()
+for epoch in epochs:
+    for input, target in data:
+        optimizer.zero_grad()
+        with autocast():
+            output = model(input)
+            loss = loss_fn(output, target)
+        scaler.scale(loss).backward()
+        scaler.step(optimizer)
+        scaler.update()
+```
+</details>
+%% Cell type:markdown id:a3f7818a-fea1-4a12-b52a-cd83e0ae2ffe tags:
+### **Pytorch Compiler**
+%% Cell type:markdown id:dbb5f69b-009e-40b3-94f0-5a420afbd003 tags:
+**For this section, you will need to use Pytorch with a version superior to 2.0.**
+PyTorch, a widely adopted deep learning framework, has consistently evolved to offer users better performance and ease of use. One such advancement is the introduction of the PyTorch Compiler. This cutting-edge feature accelerates PyTorch code execution by JIT-compiling it into optimized kernels. What's even more impressive is its ability to enhance performance with minimal modifications to the original codebase.
+Historically, PyTorch has introduced compiler solutions like TorchScript and FX Tracing. However, the introduction of torch.compile with PyTorch 2.0 has taken performance optimization to a new level. It provides a seamless experience, enabling you to transform typical PyTorch functions and even torch.nn.Module instances into their faster, compiled counterparts.
+For those eager to dive deep into its workings and benefits, detailed documentation and tutorials have been made available:
+- [torch.compile Tutorial](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html)
+- [PyTorch 2.0 Release Notes](https://pytorch.org/get-started/pytorch-2.0/)
+> **Task:**     Your task is to make your existing PyTorch model take advantage of the performance benefits offered by torch.compile. This will not only make your model run faster but also give you hands-on experience with one of the latest features in PyTorch.
+%% Cell type:markdown id:8d5236bc-08e4-4142-8c9c-fd7007474ff2 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+1. **Ensure Dependencies**:
+   - Ensure that you have the required dependencies, especially PyTorch version 2.0 or higher.
+2. **Check for GPU Compatibility**:
+   - For optimal performance, it's recommended to use a modern NVIDIA GPU (H100, A100, or V100).
+3. **Compile Functions**:
+   - You can optimize arbitrary Python functions as shown in the example:
+     ```python
+     def your_function(x, y):
+         # ... Your PyTorch code here ...
+     opt_function = torch.compile(your_function)
+     ```
+   - Alternatively, use the decorator approach:
+     ```python
+     @torch.compile
+     def opt_function(x, y):
+         # ... Your PyTorch code here ...
+     ```
+4. **Compile Modules**:
+   - If you have a PyTorch module (a class derived from `torch.nn.Module`), you can compile it similarly:
+     ```python
+     class YourModule(torch.nn.Module):
+         # ... Your module definition here ...
+     model = YourModule()
+     opt_model = torch.compile(model)
+     ```
+</details>
+%% Cell type:markdown id:bd4066a6-3f24-4b63-b2be-da0350ec6145 tags:
+Remember, while torch.compile optimizes performance, the underlying logic remains the same. Ensure to test and validate your compiled model's outputs against the original to confirm consistent behavior.
+%% Cell type:markdown id:4340d5df tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id:51be1de8 tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [PYTORCH1] - Practical Lab : PyTorch
+<!-- DESC --> PyTorch est l'un des principaux framework utilisé dans le Deep Learning
+<!-- AUTHOR : Kamel Guerda (CNRS/IDRIS) -->
+## Objectives :
+ - Understand PyTorch
+%% Cell type:markdown id:1959d3d5-388e-4c43-8318-342f08e6b024 tags:
+## **Introduction**
+%% Cell type:markdown id:a6da1305-551a-4549-abed-641415823a33 tags:
+**PyTorch** is an open-source machine learning library developed by Facebook's AI Research lab. It offers an imperative and dynamic computational model, making it particularly easy and intuitive for researchers. Its primary feature is the tensor, a multi-dimensional array similar to NumPy's ndarray, but with GPU acceleration.
+%% Cell type:markdown id:54c79dfb-a061-4b72-afe3-c97c28071e5c tags:
+### **Installation and usage**
+%% Cell type:markdown id:20852981-c289-4c4e-8099-2c5efef58e3b tags:
+Whether you're working on the supercomputer Jean Zay or your own machine, getting your environment ready is the first step. Here's how to proceed:
+%% Cell type:markdown id:a88f32bd-37f6-4e99-97e0-62283a146a1f tags:
+#### **On Jean Zay**
+%% Cell type:markdown id:8421a9f0-130d-40ef-8a7a-066bf9147066 tags:
+For those accessing the Jean Zay supercomputer (you should already be at step 3):
+1. **Access JupyterHub**: Go to [https://jupyterhub.idris.fr](https://jupyterhub.idris.fr). The login credentials are the same as those used to access the Jean Zay machine. Ensure your IP address is whitelisted (add a new IP via the account management form if needed).
+2. **Create a JupyterLab Instance**: Choose to create the instance either on a frontend node (e.g., for internet access) or on a compute node by reserving resources via Slurm. Select the appropriate options such as workspace, allocated resources, billing, etc.
+3. **Choose the Kernel**: IDRIS provides kernels based on modules installed on Jean Zay. This includes various versions of Python, Tensorflow, and PyTorch. Create a new notebook with the desired kernel through the launcher or change the kernel on an existing notebook by clicking the kernel name at the top right of the screen.
+4. For advanced features like Tensorboard, MLFlow, custom kernel creation, etc., refer to the [JupyterHub technical documentation](https://jupyterhub.idris.fr/services/documentation/).
+%% Cell type:markdown id:a168594c-cf18-4ed8-babf-242b56b3e0b7 tags:
+> **Task:** Verifying Your Kernel in the upper top corner
+>    - In JupyterLab, at the top right of your notebook, you should see the name of your current kernel.
+>    - Ensure it matches "PyTorch 2.0" or a similar name indicating the PyTorch version.
+>    - If it doesn't, click on the kernel name and select the appropriate kernel from the list.
+%% Cell type:markdown id:0aaadeee-5115-48d0-aa57-20a0a63d5054 tags:
+#### **Elsewhere**
+%% Cell type:markdown id:5d34951e-1b7b-4776-9449-eff57a9385f4 tags:
+For users on other platforms:
+1. Install PyTorch by following the official [installation guide](https://pytorch.org/get-started/locally/).
+2. If you have a GPU, ensure you've installed the necessary CUDA toolkit and cuDNN libraries.
+3. Launch your preferred Python environment, whether it's Jupyter notebook, an IDE like PyCharm, or just the terminal.
+Once your setup is complete, you're ready to dive in. Let's explore the fascinating world of deep learning!
+%% Cell type:markdown id:7552d5ac-eb8c-48e0-9e61-3b056d560f7b tags:
+### **Version**
+%% Cell type:code id:272e492f-35c5-4293-b504-8e8632da1b73 tags:
+``` python
+# Importing PyTorch
+import torch
+# TODO: Print the version of PyTorch being used
+```
+%% Cell type:markdown id:9fdbe225-4e06-4ad0-abca-4325457dc0e1 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To print the version of PyTorch you're using, you can access the <code>__version__</code> attribute of the <code>torch</code> module.
+```python
+print(torch.__version__)
+```
+%% Cell type:markdown id:72752068-02fe-4e44-8c27-40e8f66680c9 tags:
+**Why PyTorch 2.0 is a Game-Changer**
+PyTorch 2.0 represents a major step in the evolution of this popular deep learning library. As part of the transition to the 2-series, let's highlight some reasons why this version is pivotal:
+1. **Performance**: With PyTorch 2.0, performance has been supercharged at the compiler level, offering faster execution and support for Dynamic Shapes and Distributed systems.
+2. **torch.compile**: This introduces a more Pythonic approach, moving some parts of PyTorch from C++ back to Python. Notably, across a test set of 163 open-source models, the use of `torch.compile` resulted in a 43% speed increase during training on an NVIDIA A100 GPU.
+3. **Innovative Technologies**: Technologies like TorchDynamo and TorchInductor, both written in Python, make PyTorch more flexible and developer-friendly.
+4. **Staying Pythonic**: PyTorch 2.0 emphasizes Python-centric development, reducing barriers for developers and vendors.
+As we progress in this lab, we'll dive deeper into some of these features, giving you hands-on experience with the power and flexibility of PyTorch 2.0.
+%% Cell type:markdown id:bc215c02-1f16-48be-88f9-5080fd2be9ed tags:
+## **Pytorch Fundamentals**
+%% Cell type:markdown id:bcd7f0fc-a714-495e-9307-e48964abd85b tags:
+### **Tensors**
+%% Cell type:markdown id:6e185bf6-3d3c-4a43-b425-e6aa3da5d5dd tags:
+A **tensor** is a generalization of vectors and matrices and is easily understood as a multi-dimensional array. In the context of PyTorch:
+- A 0-dimensional tensor is a scalar (a single number).
+- A 1-dimensional tensor is a vector.
+- A 2-dimensional tensor is a matrix.
+- ... and so on for higher dimensions.
+Tensors are fundamental to PyTorch not just as data containers but also for their compatibility with GPU acceleration, making operations on them extremely fast. This acceleration is vital for training large neural networks.
+Let's start our journey with tensors by examining how PyTorch handles scalars.
+%% Cell type:markdown id:fa90e399-3955-4417-a4a3-c0c812ebb1d9 tags:
+#### **Scalars in PyTorch**
+### Scalars in PyTorch
+A scalar, being a 0-dimensional tensor, is simply a single number. While it might seem trivial, understanding scalars in PyTorch lays the foundation for grasping more complex tensor structures. Familiarize yourself with the `torch.tensor()` function from the [official documentation](https://pytorch.org/docs/stable/generated/torch.tensor.html) before proceeding.
+> **Task**: Create a scalar tensor in PyTorch and examine its properties.
+%% Cell type:code id:b6db1841-0fab-4df0-b699-058d5a477ca6 tags:
+``` python
+# TODO: Create a scalar tensor with the value 7.5
+scalar_tensor = # Your code here
+# Print the scalar tensor
+print("Scalar Tensor:", scalar_tensor)
+# TODO: Print its dimension, shape, and type
+```
+%% Output
+      Cell In[2], line 2
+        scalar_tensor = # Your code here
+                        ^
+    SyntaxError: invalid syntax
+%% Cell type:markdown id:c9bc265c-9a7f-4588-8586-562b390d63d9 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To create a scalar tensor, use the <code>torch.tensor()</code> function. To retrieve its dimension, shape, and type, you can use the <code>.dim()</code>, <code>.shape</code>, and <code>.dtype</code> attributes respectively.
+Here's how you can achieve that:
+```python
+scalar_tensor = torch.tensor(7.5)
+print("Scalar Tensor:", scalar_tensor)
+print("Dimension:", scalar_tensor.dim())
+print("Shape:", scalar_tensor.shape)
+print("Type:", scalar_tensor.dtype)
+```
+</details>
+%% Cell type:markdown id:fc240c26-5866-4080-bbb9-d5cde1500300 tags:
+#### **Vectors in PyTorch**
+A vector in PyTorch is a 1-dimensional tensor. It's essentially a list of numbers that can represent anything from a sequence of data points to the weights of a neural network layer.
+In this section, we'll see how to create and manipulate vectors using PyTorch. We'll also look at some basic operations you can perform on them.
+> **Task**: Create a 1-dimensional tensor (vector) with values `[1.5, 2.3, 3.1, 4.8, 5.2]` and print its dimension, shape, and type.
+Start by referring to the `torch.tensor()` function in the [official documentation](https://pytorch.org/docs/stable/generated/torch.tensor.html) to understand how to create tensors of varying dimensions.
+%% Cell type:code id:e9503b49-38d1-45d9-910f-761da82cfbd0 tags:
+``` python
+# TODO: Create a 1-dimensional tensor (vector) with values [1.5, 2.3, 3.1, 4.8, 5.2]
+vector_tensor = # Your code here
+# Print the vector tensor
+print("Vector Tensor:", vector_tensor)
+# TODO: Print its dimension, shape, and type
+```
+%% Output
+      Cell In[3], line 2
+        vector_tensor = # Your code here
+                        ^
+    SyntaxError: invalid syntax
+%% Cell type:markdown id:13252d1f-004f-42e0-aec9-56322b43ab72 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Creating a 1-dimensional tensor is similar to creating a scalar. Instead of a single number, you pass a list of numbers to the <code>torch.tensor()</code> function. The <code>.dim()</code>, <code>.shape</code>, and <code>.dtype</code> attributes will help you retrieve its properties.
+```python
+vector_tensor = torch.tensor([1.5, 2.3, 3.1, 4.8, 5.2])
+print("Vector Tensor:", vector_tensor)
+print("Dimension:", vector_tensor.dim())
+print("Shape:", vector_tensor.shape)
+print("Type:", vector_tensor.dtype)
+```
+</details>
+%% Cell type:markdown id:7bfc47a8-e99d-4683-ac36-287f35a76fd0 tags:
+#### **Vector Operations**
+Vectors are not just static entities; we often perform various operations on them, especially in the context of neural networks. This includes addition, subtraction, scalar multiplication, dot products, etc.
+> **Task**: Using the previously defined `vector_tensor`, perform the following operations:
+1. Add 5 to all the elements of the vector.
+2. Multiply all the elements of the vector by 2.
+3. Compute the dot product of the vector with itself.
+%% Cell type:code id:86182e1c-5491-4743-a7c8-10b9effd8194 tags:
+``` python
+# TODO: Add 5 to all elements
+vector_added = # Your code here
+# TODO: Multiply all elements by 2
+vector_multiplied = # Your code here
+# TODO: Compute the dot product with itself
+dot_product = # Your code here
+# Print the results
+print("Vector after addition:", vector_added)
+print("Vector after multiplication:", vector_multiplied)
+print("Dot Product:", dot_product)
+```
+%% Output
+      Cell In[4], line 2
+        vector_added = # Your code here
+                       ^
+    SyntaxError: invalid syntax
+%% Cell type:markdown id:75773a02-3ab4-4325-99fb-7a742e997f21 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+PyTorch tensors support regular arithmetic operations. For the dot product, you can use the <code>torch.dot()</code> function.
+```python
+vector_added = vector_tensor + 5
+vector_multiplied = vector_tensor * 2
+dot_product = torch.dot(vector_tensor, vector_tensor)
+print("Vector after addition:", vector_added)
+print("Vector after multiplication:", vector_multiplied)
+print("Dot Product:", dot_product)
+```
+</details>
+%% Cell type:markdown id:2b4766ba-ef9a-4f24-ba43-7358097a7b61 tags:
+#### **Matrices in PyTorch**
+A matrix in PyTorch is represented as a 2D tensor. Just as vectors are generalizations of scalars, matrices are generalizations of vectors, providing an additional dimension. Matrices are crucial for a range of operations in deep learning, including representing datasets, transformations, and more.
+%% Cell type:markdown id:2ec7544d-ef87-4773-88d8-cee731d1c43c tags:
+##### **Creating Matrices**
+Before diving into manual matrix creation, it's beneficial to know some utility functions PyTorch provides:
+- `torch.rand()`: Generates a matrix with random values between 0 and 1.
+- `torch.eye()`: Creates an identity matrix.
+- `torch.zeros()`: Generates a matrix filled with zeros.
+- `torch.ones()`: Generates a matrix filled with ones.
+You can explore more about these functions in the [official documentation](https://pytorch.org/docs/stable/tensors.html).
+> **Task**: Using the above functions, create the following matrices:
+> 1. A 3x3 matrix with random values.
+> 2. A 5x5 identity matrix.
+> 3. A 2x4 matrix filled with zeros.
+> 4. A 4x2 matrix filled with ones.
+%% Cell type:code id:5014b564-6bf5-4f00-a513-578ca72d94a8 tags:
+``` python
+# Your code for creating the matrices goes here
+```
+%% Cell type:markdown id:86b2708c-45c6-4b2c-b526-41491fcafa08 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To create these matrices, make use of the following functions:
+1. `torch.rand(size)`: Use this function and specify the size as `(3, 3)` to create a 3x3 matrix with random values.
+2. `torch.eye(n, m)`: Use this to generate an identity matrix. For a square matrix like 5x5, n and m would both be 5.
+3. `torch.zeros(m, n)`: For a 2x4 matrix filled with zeros, specify m=2 and n=4.
+4. `torch.ones(m, n)`: Similar to the `zeros` function but fills the matrix with ones.
+```python
+# 1. 3x3 matrix with random values
+random_matrix = torch.rand(3, 3)
+print(random_matrix)
+# 2. 5x5 identity matrix
+identity_matrix = torch.eye(5, 5)
+print(identity_matrix)
+# 3. 2x4 matrix filled with zeros
+zero_matrix = torch.zeros(2, 4)
+print(zero_matrix)
+# 4. 4x2 matrix filled with ones
+one_matrix = torch.ones(4, 2)
+print(one_matrix)
+```
+</details>
+%% Cell type:markdown id:60ff5e51-699e-46a1-8cc7-1d5fc9a4d078 tags:
+#### **Matrix Operations in PyTorch**
+Just like vectors, matrices can undergo a variety of operations. Some of the basic ones include matrix addition, subtraction, and multiplication. More advanced operations include matrix inversion, transposition, and determinant calculation.
+%% Cell type:markdown id:c6bdb9d9-b299-4d63-b92f-7c4b8c32a1b7 tags:
+##### **Basic Matrix Operations**
+> **Task**: Perform the following operations on matrices:
+> 1. Create two 3x3 matrices with random values.
+> 2. Add the two matrices.
+> 3. Subtract the second matrix from the first one.
+> 4. Multiply the two matrices element-wise.
+Remember, for matrix multiplication that results in the dot product, you'd use `torch.mm` or `@`, but for element-wise multiplication, you use `*`.
+Here's the [official documentation](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.matmul) on matrix operations for your reference.
+%% Cell type:code id:6be8c647-c455-4d3b-8a21-c4b7102ffa75 tags:
+``` python
+# Your code for creating the matrices and performing the operations goes here
+```
+%% Cell type:markdown id:0020b26b-b2bb-4efa-9bf3-3f037acd050e tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Here's how you can perform the given matrix operations:
+```python
+# 1. Create two 3x3 matrices with random values
+matrix1 = torch.rand(3, 3)
+matrix2 = torch.rand(3, 3)
+print("Matrix 1:\n", matrix1)
+print("\nMatrix 2:\n", matrix2)
+# 2. Add the two matrices
+sum_matrix = matrix1 + matrix2
+print("\nSum of matrices:\n", sum_matrix)
+# 3. Subtract the second matrix from the first one
+difference_matrix = matrix1 - matrix2
+print("\nDifference of matrices:\n", difference_matrix)
+# 4. Multiply the two matrices element-wise
+product_matrix = matrix1 * matrix2
+print("\nElement-wise product of matrices:\n", product_matrix)
+```
+</details>
+%% Cell type:markdown id:07f57464-76e2-4670-8332-3fcec2e162bd tags:
+#### **Higher-Dimensional Tensors in PyTorch**
+While scalars, vectors, and matrices cover 0D, 1D, and 2D tensors respectively, in deep learning, especially in tasks like image processing, you often encounter tensors with more than two dimensions.
+For instance, a colored image is often represented as a 3D tensor: height x width x channels (e.g., RGB channels). A batch of such images would then be a 4D tensor: batch_size x height x width x channels.
+Let's get our hands dirty with some higher-dimensional tensors!
+%% Cell type:markdown id:3dd1fea7-d290-49fe-ac1f-5a8387e3d386 tags:
+##### **Creating a 3D Tensor**
+> **Task**: Create a 3D tensor representing 2 images of size 4x4 with 3 channels (like RGB) filled with random values.
+Use the `torch.rand` function, and remember to specify the dimensions correctly.
+Here's the [official documentation](https://pytorch.org/docs/stable/tensors.html#creation-ops) for tensor creation.
+%% Cell type:code id:e7c8ac6e-f870-4b5d-ac2c-05be1d0cc9f1 tags:
+``` python
+# Your code for creating the 3D tensor goes here
+```
+%% Cell type:markdown id:efe61750-a91f-428a-b4e2-7df0cc2a782b tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Creating a 3D tensor with the given specifications can be achieved using the `torch.rand` function. Here's how:
+```python
+# Create a 3D tensor representing 2 images of size 4x4 with 3 channels
+image_tensor = torch.rand(2, 4, 4, 3)
+print(image_tensor)
+```
+</details>
+%% Cell type:markdown id:8cfbcaa0-a0f6-4869-ba94-65d4439a60ca tags:
+#### **Reshaping Tensors**
+In deep learning, we often need to reshape our tensors. For instance, an image represented as a 3D tensor might need to be reshaped into a 1D tensor before passing it through a fully connected layer. PyTorch provides methods to make this easy.
+The most commonly used method for reshaping tensors in PyTorch is the `view()` method. Another method that offers more flexibility (especially when you're unsure about the size of one dimension) is `reshape()`.
+>[Task]: Using the official documentation, find out how to use the [`view()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view) and [`reshape()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.reshape) methods. Create a 2x3 tensor using `torch.tensor()` and then reshape it into a 3x2 tensor.
+%% Cell type:code id:e6758ba7-aa35-42f0-87c1-86b88de64238 tags:
+``` python
+# Create a 2x3 tensor
+# Reshape it into a 3x2 tensor
+```
+%% Cell type:markdown id:fea31255-c2fe-47b2-b03b-c2b35953e05a tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To reshape a tensor using <code>view()</code> method:
+```python
+tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
+reshaped_tensor = tensor.view(3, 2)
+```
+<br>
+Alternatively, using the <code>reshape()</code> method:
+```python
+reshaped_tensor = tensor.reshape(3, 2)
+```
+</details>
+%% Cell type:markdown id:c580dbca-b75a-4b97-a24a-6a19c7cdf8d1 tags:
+#### **Broadcasting**
+Broadcasting is a powerful feature in PyTorch that allows you to perform operations between tensors of different shapes. When possible, PyTorch will automatically reshape the tensors in a way that makes the operation valid. This can significantly reduce manual reshaping and is efficient in memory usage.
+However, it's essential to understand the rules and nuances of broadcasting to use it effectively and avoid unexpected behaviors.
+>[Task]: Given a tensor `A` of shape (4, 1) and another tensor `B` of shape (1, 4), use PyTorch operations to produce a result tensor of shape (4, 4). Check the [official documentation on broadcasting](https://pytorch.org/docs/stable/notes/broadcasting.html) for guidance.
+%% Cell type:code id:44566fb7-87ed-41ef-a86e-db32a1cf2179 tags:
+``` python
+# Define tensor A of shape (4, 1) and tensor B of shape (1, 4)
+# Perform an operation to get a result tensor of shape (4, 4)
+```
+%% Cell type:markdown id:2602f2c4-f507-4a9a-8e8d-dee5e95efc61 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+You can simply use addition, subtraction, multiplication, or any other element-wise operation. When you do this operation, PyTorch will automatically broadcast the tensors to a compatible shape. For example:
+```python
+A = torch.tensor([[1], [2], [3], [4]])
+B = torch.tensor([[1, 2, 3, 4]])
+result = A * B
+print(result)
+```
+</details>
+%% Cell type:markdown id:ba2cc439-8ecc-4d92-b78f-39ef762678f8 tags:
+### **GPU Support with CUDA**
+%% Cell type:markdown id:575536c5-87a7-4781-8557-558627f14c0a tags:
+PyTorch seamlessly supports operations on Graphics Processing Units (GPUs) through CUDA, an API developed by NVIDIA for their GPUs. If you have a compatible NVIDIA GPU on your machine, PyTorch can utilize it to speed up tensor operations which can be orders of magnitude faster than on a CPU.
+To verify if your PyTorch installation can use CUDA, you can check the attribute `torch.cuda.is_available()`. This returns `True` if CUDA is available and PyTorch can use GPUs, otherwise it returns `False`.
+>[Task]: Print whether CUDA support is available on your system. The [CUDA documentation](https://pytorch.org/docs/stable/cuda.html) might be useful for this task.
+%% Cell type:code id:38e84bb7-5026-4262-8b78-b368c55a1450 tags:
+``` python
+# Check and print if CUDA is available
+cuda_available = None  # Replace None with the appropriate code
+print("CUDA available:", cuda_availablez
+```
+%% Cell type:markdown id:646b5660-5131-4ce0-9592-0fd14608c6df tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To check if CUDA is available, you can utilize the torch.cuda.is_available() function.
+```python
+cuda_available = torch.cuda.is_available()
+print("CUDA available:", cuda_available)
+```
+</details>
+%% Cell type:markdown id:86c8d7ed-0931-4874-bb27-e796ae1a1d7a tags:
+When developing deep learning models in PyTorch, it's a good habit to write device-agnostic code. This means your code can automatically use a GPU if available, or fall back to using the CPU if not. The `torch.device` object allows you to specify the device (either CPU or GPU) where you'd like your tensors to be allocated.
+To dynamically determine the device, a common pattern is to check `torch.cuda.is_available()`, and set the device accordingly. This is particularly useful when you want your code to be flexible, regardless of the underlying hardware.
+>[Task]: Define a `device` variable that is set to 'cuda:0' if CUDA is available and 'cpu' otherwise. Create a tensor on this device. The [documentation about torch.device](https://pytorch.org/docs/stable/tensor_attributes.html#torch-device) might be handy.
+%% Cell type:code id:91e05e75-03ad-44cb-9842-89e2017ee709 tags:
+``` python
+# Define the device
+device = None  # Replace None with the appropriate code
+# Create a tensor on the specified device
+tensor_on_device = torch.tensor([1, 2, 3, 4, 5], device=device)
+```
+%% Cell type:markdown id:3b80406b-b1cc-4831-a6ba-8e6385703755 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To define the device variable dynamically:
+```python
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+```
+<br>
+After setting the device, you can create tensors on it directly using the device argument.
+</details>
+%% Cell type:markdown id:574a2192-cc09-4d2c-8f01-97b051b7ffc8 tags:
+### **Automatic Differentiation with Autograd**
+%% Cell type:markdown id:7f5406f6-e295-4f70-a815-9eef18352390 tags:
+PyTorch's `autograd` module provides the tools for automatically computing the gradients for tensors. This feature is a cornerstone for neural network training, as gradients are essential for optimization algorithms like gradient descent.
+When we create a tensor, `requires_grad` is set to `False` by default, meaning it won't track operations. However, if we set `requires_grad=True`, PyTorch will start to track all operations on the tensor.
+Let's start with a simple example:
+>**Task:** Create a tensor that holds a single value, let's say 2, and set `requires_grad=True`. Then, define a simple operation like squaring the tensor. Finally, inspect the resulting tensor. The [documentation for requires_grad](https://pytorch.org/docs/stable/autograd.html#torch.Tensor.requires_grad) might be handy.
+%% Cell type:code id:fe63ab93-55be-434d-822f-8fd9cd727941 tags:
+``` python
+# TODO: Create a tensor, perform a simple operation, and print its data and grad_fn separately.
+```
+%% Cell type:markdown id:fa7ee20c-c2d6-4dcf-bb37-9eda580b5dc5 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To create a tensor with requires_grad=True and square it:
+```python
+# TODO: Create a tensor, perform a simple operation, and print its data and grad_fn separately.
+x = torch.tensor([2.0], requires_grad=True)
+y = x ** 2
+print("Data:", y.data)
+print("grad_fn:", y.grad_fn)
+```
+</details>
+%% Cell type:markdown id:c14dde16-a6be-4151-94cb-96ae98f0648a tags:
+Once the operation is executed on a tensor, a new attribute grad_fn is created. This attribute references a function that has created the tensor. In our example, since we squared the tensor, grad_fn will be of type PowBackward0.
+This grad_fn attribute provides a link to the computational history of the tensor, allowing PyTorch to backpropagate errors and compute gradients when training neural networks.
+%% Cell type:markdown id:0965e79e-558a-45a9-8ab2-614c503e59c0 tags:
+#### **Computing Gradients**
+%% Cell type:markdown id:36fb6c5b-9b39-4a2f-a767-61032b1b4ffc tags:
+Now, let's compute the gradients of `out` with respect to `x`. To do this, we'll call the `backward()` method on the tensor `out`.
+>[Task]: Compute the gradients of `out` by calling the `backward()` method on it. Afterwards, print the gradients of `x`. The [documentation for backward()](https://pytorch.org/docs/stable/autograd.html#torch.autograd.backward) may be useful.
+%% Cell type:code id:83685760-bde9-4327-88f7-cfe02bdb3309 tags:
+``` python
+# TODO: Compute the gradient and print it.
+```
+%% Cell type:markdown id:9b1d104b-efef-4fff-869d-8dde1131868e tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To compute the gradient:
+```python
+y.backward()
+print(x.grad)
+```
+</details>
+%% Cell type:markdown id:d7f5aecb-8623-481f-a5cf-f8b6dd0c9a37 tags:
+#### **Gradient Accumulation**
+%% Cell type:markdown id:1a4df0a1-12a0-4129-a258-915fa8440193 tags:
+In PyTorch, the gradients of tensors are accumulated into the `.grad` attribute each time you call `.backward()`. This means that if you call `.backward()` multiple times, the gradients will add up.
+However, by default, calling `.backward()` consumes the computational graph to save memory. If you intend to call `.backward()` multiple times on the same graph, you need to specify `retain_graph=True` during all but the last call.
+>[Task]: Create a tensor, perform an operation on it, and then call `backward()` twice. Use `retain_graph=True` in the first call to retain the computational graph. Observe the `.grad` attribute after each call.
+%% Cell type:code id:50a04095-9d7e-48ba-90ed-06718cd379f0 tags:
+``` python
+# Create a tensor
+w = torch.tensor([1.0], requires_grad=True)
+# Operation
+result = w * 2
+# TODO: Call backward twice (using retain_graph=True for the first call) and print the grad after each call
+# ...
+```
+%% Cell type:markdown id:d699e58d-d479-466a-b592-cbf68d185c3b tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+result.backward(retain_graph=True)
+print(w.grad)  # This should print 2
+result.backward()
+print(w.grad)  # This should print 4, as gradients get accumulated
+```
+</details>
+%% Cell type:markdown id:88d30f87-2469-4289-ad8a-51a25a2e8b82 tags:
+#### **Zeroing Gradients**
+%% Cell type:markdown id:2ea93580-9a35-4f5d-8f29-0a324d28d28a tags:
+In neural network training, we typically want to update our weights with the gradients after each forward and backward pass. This means that we don't want the gradients to accumulate across multiple passes. Hence, it's common to zero out the gradients at the start of a new iteration.
+>[Task]: Using the tensor from the previous cell, zero out its gradients and verify that it has been set to zero.
+%% Cell type:code id:9cb03a91-d1df-4bbf-a0d2-b5580c643e12 tags:
+``` python
+# TODO: Zero out the gradients of w and print
+```
+%% Cell type:markdown id:4a89ff66-b1ef-413a-a41c-847e8c832e4b tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+w.grad.zero_()
+print(w.grad)
+```
+</details>
+%% Cell type:markdown id:85f75515-3d89-4249-b00a-03c13cca92d4 tags:
+#### **Non-Scalar Backward**
+%% Cell type:markdown id:86a54a2c-e8c1-4278-a3fe-ed60564ebd07 tags:
+When dealing with non-scalar tensors, `backward` requires an additional argument: the gradient of the tensor with respect to some scalar (usually a loss).
+>[Task]: Create a tensor of shape (2, 2) with `requires_grad=True`. Compute a non-scalar result by multiplying the tensor with itself. Then, compute backward with a gradient argument. You can consult the [backward documentation](https://pytorch.org/docs/stable/autograd.html#torch.autograd.backward) for reference.
+%% Cell type:code id:cc0e4271-c356-4a4e-9a3a-5df1403a4211 tags:
+``` python
+# TODO: Create a tensor, perform an operation, and compute backward with a gradient argument
+```
+%% Cell type:markdown id:e7ee72f3-f51c-4849-b41d-136028029185 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+v = torch.tensor([[2.0, 3.0], [4.0, 5.0]], requires_grad=True)
+result = v * v
+grads = torch.tensor([[1.0, 1.0], [1.0, 1.0]])
+result.backward(grads)
+```
+</details>
+%% Cell type:markdown id:2e403021-4854-4e97-9898-82ed355293e7 tags:
+#### **Stopping Gradient Tracking**
+%% Cell type:markdown id:ba644253-8523-480d-8318-a87047671a21 tags:
+There are scenarios where we don't want to track the gradients for certain operations. This can be achieved in two main ways:
+1. **Using `torch.no_grad()`**: This context manager ensures that the enclosed operations are excluded from gradient tracking.
+2. **Using `.detach()`**: Creates a tensor that shares the same storage but does not require gradients.
+>[Task]: Create a tensor with `requires_grad=True`. Then, demonstrate both methods above to prevent gradient computation.
+%% Cell type:code id:1feb2f9b-0c5f-4e9d-b042-e74052bc83a9 tags:
+``` python
+# TODO: Demonstrate operations without gradient tracking
+```
+%% Cell type:markdown id:a5eff82b-bfbd-4be7-afa3-dc00f5341568 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+# Using torch.no_grad()
+with torch.no_grad():
+    result_no_grad = v * v
+print(result_no_grad.requires_grad)
+# Using .detach()
+detached_tensor = v.detach()
+result_detach = detached_tensor * detached_tensor
+print(result_detach.requires_grad)
+```
+</details>
+%% Cell type:markdown id:efe66a5d-ac63-4623-8182-3b5aff58abbe tags:
+## **Building a Simple Neural Network with PyTorch**
+%% Cell type:markdown id:aa4b7630-fc1e-4f7b-b86b-3c0d233cdc49 tags:
+Neural networks are the cornerstone of deep learning. They are organized as a series of interconnected nodes or "neurons" that are structured into layers: an input layer, several hidden layers, and an output layer. Data flows through this network, undergoing transformations at each node, until it emerges at the output.
+With PyTorch's `torch.nn` module, constructing these neural networks becomes straightforward. Let's dive into its main components:
+%% Cell type:markdown id:8e98f379-5580-477c-8b7b-c641f5edf710 tags:
+### **nn.Module: The Base Class for Neural Networks**
+%% Cell type:markdown id:15d72ea2-c846-44f5-85d5-bd1990c154bc tags:
+Every neural network in PyTorch is derived from the `nn.Module` class. This class offers:
+- Organization and management of the layers.
+- Capabilities for GPU acceleration.
+- Implementation of the forward pass.
+When we inherit from `nn.Module`, our custom neural network class benefits from these functionalities.
+For more details, you can refer to the official [documentation](https://pytorch.org/docs/stable/generated/torch.nn.Module.html).
+>**Task:** Familiarize yourself with the structure of a simple neural network provided below. Later, you'll be enriching it.
+%% Cell type:code id:425abefe-54b9-4944-bc6e-cc78de892c66 tags:
+``` python
+import torch.nn as nn
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        # Define layers here
+    def forward(self, x):
+        # Call the layers in the correct order here
+        return x
+```
+%% Cell type:markdown id:892e3b55-097b-436e-bbf8-a380fd7d9e35 tags:
+### **Linear Layers: Making Connections**
+%% Cell type:markdown id:564c17bb-543f-42f6-8c5d-b855ccaf71e6 tags:
+In PyTorch, a linear layer performs an affine transformation. It has both weights and biases which get updated during training. The transformation it performs can be described as:
+$ y = xA^T + b $
+Where:
+- \( x \) is the input
+- \( A \) represents the weights
+- \( b \) is the bias
+The `nn.Linear` class in PyTorch creates such a layer.
+[Documentation Link for nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html)
+> **Task:** Add an input layer and an output layer to the `SimpleNet` class.
+>
+> - The input layer should transform from `input_size` to `hidden_size`.
+> - The output layer should transform from `hidden_size` to `output_size`.
+> - After defining the layers in the `__init__` method, call them in the `forward` method to perform the transformations.
+%% Cell type:code id:daa8829a-05e9-474e-b6e6-c7f749e22295 tags:
+``` python
+# Modify the below code by adding input and output linear layers in the appropriate places
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        # Define layers here
+    def forward(self, x):
+        # Call the layers in the correct order here
+        return x
+```
+%% Cell type:markdown id:c5038840-2713-4492-b7ab-c70469a2e96e tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To define the input and output linear layers, use the `nn.Linear` class in the `__init__` method:
+Then, in the `forward` method, pass the input through the defined layers.
+```python
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        self.input_layer = nn.Linear(input_size, hidden_size)
+        self.output_layer = nn.Linear(hidden_size, output_size)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.output_layer(x)
+        return x
+```
+</details>
+%% Cell type:markdown id:c2bb82c9-8949-4472-84fe-def36c514150 tags:
+### **Activation Functions: Introducing Non-Linearity**
+%% Cell type:markdown id:d989e2d8-5530-45f3-8664-e0d1b9eb627a tags:
+Activation functions are critical components in neural networks, introducing non-linearity between layers. This non-linearity allows networks to learn from the error and make adjustments, which is essential for learning complex patterns.
+In PyTorch, many activation functions are available as part of the `torch.nn` module, such as ReLU, Sigmoid, and Tanh.
+For our `SimpleNet` model, we'll use the ReLU (Rectified Linear Unit) activation function after the input layer. The ReLU function is defined as \(f(x) = max(0, x)\).
+Learn more about [ReLU and other activation functions in the official documentation](https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity).
+> **Task**: Update your `SimpleNet` class to include the ReLU activation function after the input layer. For this, you'll need to both define the activation function in `__init__` and apply it in the `forward` method.
+%% Cell type:code id:9e426301-5a55-46a2-8305-241b8f1ca4bf tags:
+``` python
+# Copy the previous SimpleNet definition and modify the code to include the ReLU activation function.
+```
+%% Cell type:markdown id:212ef244-f7bf-49a2-b4c9-b1b90af315de tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To include the ReLU activation in your neural network:
+1. Define the ReLU activation function in the `__init__` method.
+2. Apply the activation function in the `forward` method after passing through the `input_layer`.
+```python
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        self.input_layer = nn.Linear(input_size, hidden_size)
+        self.relu = nn.ReLU()  # Defining the ReLU activation function
+        self.output_layer = nn.Linear(hidden_size, output_size)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.relu(x)  # Applying the ReLU activation function
+        x = self.output_layer(x)
+        return x
+```
+</details>
+%% Cell type:markdown id:640ef2f4-6816-4c5e-955c-c14c33349512 tags:
+#### **Adjusting the Network: Adding Dropout**
+%% Cell type:markdown id:e5596abf-b262-461d-ad5f-6a3488a79a42 tags:
+[Dropout](https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html) is a regularization technique that can improve generalization in neural networks. It works by randomly setting a fraction of input units to 0 at each update during training time.
+> **Task**: Modify the `SimpleNet` class to include a dropout layer with a dropout probability of 0.5 between the input layer and the output layer. Don't forget to call this layer in the forward method.
+>
+> Remember, after modifying the class structure, you'll need to re-instantiate your model object.
+%% Cell type:code id:1c68ffd4-1de6-4d77-a15f-705b24c924af tags:
+``` python
+# Add a dropout layer to your previous code
+```
+%% Cell type:markdown id:d78c2dab-95c1-441c-b661-80bfba9a2dfd tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Here's how you can modify the SimpleNet class to include dropout:
+```python
+class SimpleNet(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(SimpleNet, self).__init__()
+        self.input_layer = nn.Linear(input_size, hidden_size)
+        self.dropout = nn.Dropout(0.5)
+        self.output_layer = nn.Linear(hidden_size, output_size)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.dropout(x)
+        return self.output_layer(x)
+model = SimpleNet(input_size, hidden_size, output_size).to(device)
+```
+Don't forget to create a new instance of your model: model = SimpleNet(input_size, hidden_size, output_size).to(device)
+</details>
+%% Cell type:markdown id:ce1cb22c-8288-4c69-9dcb-56896de49794 tags:
+### **Utilizing the Neural Network**
+%% Cell type:markdown id:255c3bf2-419d-4d14-82d6-7959e9280670 tags:
+Once our neural network is defined, it's time to put it to use. This section will cover:
+1. Instantiating the network
+2. Transferring the network to GPU (if available)
+3. Making predictions using the network (forward pass)
+4. Understanding training and evaluation modes
+5. Performing a backward pass to compute gradients
+%% Cell type:markdown id:9f28cee5-c7a0-48c5-8341-6da6fae516c5 tags:
+#### **1. Instantiating the Network**
+%% Cell type:markdown id:0760bef6-d77a-4b7b-b5c7-18b208d93b98 tags:
+To use our `SimpleNet`, we first need to create an instance of it. While creating an instance, the network's weights are also initialized.
+> **Task**: Instantiate the `SimpleNet` class. Use `input_size=5`, `hidden_size=3`, and `output_size=1` as parameters.
+%% Cell type:code id:ae9bfc87-5b09-476c-b32b-92c09f992fe3 tags:
+``` python
+# Your code here: Instantiate the model
+```
+%% Cell type:markdown id:f951e5d2-e0b4-451d-9a9b-44256f8a224c tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To instantiate the SimpleNet class:
+```python
+model = SimpleNet(input_size=5, hidden_size=3, output_size=1)
+print(model)
+```
+</details>
+%% Cell type:markdown id:35567e41-6de6-429b-be4b-a14598313aca tags:
+#### **2. Transferring the Network to GPU**
+%% Cell type:markdown id:b3f3b3c3-4d7a-46db-9634-1e14b277c808 tags:
+PyTorch makes it very straightforward to transfer our model to a GPU if one is available. This is done using the .to() method.
+> **Task**: Check if GPU (CUDA) is available. If it is, transfer the model to the GPU.
+%% Cell type:code id:91cb61a0-d890-4697-88d9-7749ea2bf144 tags:
+``` python
+# Check for GPU availability and transfer the model to GPU if available.
+```
+%% Cell type:markdown id:8a405f2d-3d8d-4e4c-90d1-54a05ff08b90 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To transfer the model to the GPU if it's available:
+```python
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+```
+</details>
+%% Cell type:markdown id:175ab7cc-cddf-4460-ab01-f0193c2908d7 tags:
+#### **3. Making Predictions using the Network (Forward Pass)**
+%% Cell type:markdown id:e3724444-e0a6-48b0-8872-0b53b000a3bd tags:
+With our model instantiated and potentially on a GPU, we can use it to make predictions. This involves passing some input data through the model, which is commonly referred to as a forward pass.
+> **Task**: Create a tensor of size [1, 5] (representing one sample with five features) with random values. Transfer this tensor to the same device as your model (GPU or CPU). Then, pass this tensor through your model to get the prediction.
+%% Cell type:code id:00e818ee-72e0-4960-a87e-a27b771d58eb tags:
+``` python
+# Create a tensor, transfer it to the right device, and perform a forward pass.
+```
+%% Cell type:markdown id:8bc38fde-0c14-45a6-b237-76ec7beab7f0 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To make predictions using your model:
+```python
+# Create a tensor with random values
+input_tensor = torch.randn(1, 5).to(device)
+# Pass the tensor through the model
+output = model(input_tensor)
+print(output)
+```
+</details>
+%% Cell type:markdown id:fad9f46f-b591-4a2f-b2bf-3b4cf54cf961 tags:
+#### **4. Understanding Training and Evaluation Modes**
+%% Cell type:markdown id:2f197278-8d74-4a69-8da9-caf3f952e7bc tags:
+Every PyTorch model has two modes:
+- `train` mode: In this mode, certain layers like dropout or batch normalization behave differently than during evaluation. For instance, dropout will randomly set a fraction of input units to 0 at each update during training.
+- `eval` mode: Here, the model behaves in a deterministic manner. Dropout layers don't drop activations, and batch normalization uses the entire dataset's statistics instead of the current mini-batch's statistics.
+Setting the model to the correct mode is crucial. Let's demonstrate this.
+> **Task**: Set your model to `train` mode, then perform a forward pass using the same input tensor multiple times and observe the outputs. Then, set your model to `eval` mode and repeat. Notice any differences?
+%% Cell type:code id:4c2d921d-d409-4ae6-8ee4-8376fc9a209d tags:
+``` python
+# Perform the forward passes multiple times with the same input in both modes and observe the outputs.
+```
+%% Cell type:markdown id:0dbd65fa-b86b-4516-9fb1-aceae0c9d8a3 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Here's how you can demonstrate the difference:
+```python
+# Set to train mode
+model.train()
+# Forward pass multiple times
+print("Train mode:")
+for i in range(5):
+    print(model(input_tensor))
+# Set to eval mode
+model.eval()
+print("Eval mode:")
+# Forward pass multiple times
+for i in range(5):
+    print(model(input_tensor))
+```
+If there were layers like dropout in your model, you'd notice that the outputs in training mode might differ on each pass, while in evaluation mode, they remain consistent.
+</details>
+%% Cell type:markdown id:e8c55be3-71f7-45e7-91d1-c556e8108fef tags:
+## **The Training Procedure in PyTorch**
+%% Cell type:markdown id:eac54af7-c8db-4a19-861b-2eecf68fb44e tags:
+Training a neural network involves several key components: defining a loss function to measure errors, selecting an optimization method to adjust the model's weights, and iterating over the dataset multiple times. In this section, we will break down these components step by step, starting with the basics and moving towards more complex tasks.
+%% Cell type:markdown id:3e9231a9-105c-4aed-bfa5-846ddc07245f tags:
+### **Datasets and DataLoaders: Handling and Batching Data**
+%% Cell type:markdown id:8dbc3fcf-5a29-4fd8-9e82-3eaae4c8dc90 tags:
+In PyTorch, the torch.utils.data.Dataset class is used to represent a dataset. This abstract class requires the implementation of two primary methods: __len__ (to return the number of items) and __getitem__ (to return the item at a given index). However, PyTorch provides a utility class, TensorDataset, that wraps tensors in the dataset format, making it easier to use with the DataLoader.
+The torch.utils.data.DataLoader class is a more powerful tool, responsible for:
+- Batching the data
+- Shuffling the data
+- Loading the data in parallel using multiprocessing workers
+Let's wrap some data in a Dataset and use a DataLoader to handle batching and shuffling.
+> **Task**: Convert the input and target tensors into a dataset and dataloader. For this exercise, set the batch size to 32.
+Below we define synthetic data that is learnable.
+This way, we're essentially modeling the relationship $y=mx+c+noise$  where:
+- $y$ is the target or output.
+- $m$ is the slope of the line.
+- $c$ is the y-intercept.
+- $x$ is the input.
+- $noise$ is a small random value added to each point to make the data more realistic.
+%% Cell type:code id:f8335e62-e0c0-4381-9c20-1ca8ed78516c tags:
+``` python
+num_samples = 1000
+# Define the relationship
+m = 2.0
+c = 1.0
+noise_factor = 0.05
+# Generate input tensor
+input_tensor = torch.linspace(-10, 10, num_samples).view(-1, 1)
+# Generate target tensor based on the relationship
+target_tensor = m * input_tensor + c + noise_factor * torch.randn(num_samples, 1)
+import matplotlib.pyplot as plt
+plt.figure(figsize=(10,6))
+plt.scatter(input_tensor.numpy(), target_tensor.numpy(), color='blue', marker='o')
+plt.title("Synthetic Data Visualization")
+plt.xlabel("Input")
+plt.ylabel("Target")
+plt.grid(True)
+plt.show()
+```
+%% Cell type:code id:9535ad7e-6534-491b-b38d-b61cdd60b39d tags:
+``` python
+# Convert our data into a dataset
+# ...
+# Create a data loader for mini-batch training
+# ...
+```
+%% Cell type:markdown id:da99866e-ebd0-403d-8159-8a36d601bf09 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Use the TensorDataset class from torch.utils.data to wrap your tensors in a dataset format. After defining your dataset, you can use the DataLoader class to create an iterator that will return batches of data.
+```python
+from torch.utils.data import DataLoader, TensorDataset
+# Convert our data into a dataset
+dataset = TensorDataset(input_tensor, target_tensor)
+# Create a data loader for mini-batch training
+batch_size = 32
+data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+```
+</details>
+%% Cell type:markdown id:ea5aee0c-6c8a-485f-b099-9844a28bafa3 tags:
+> **Task**: Explore the `dataset` and `data_loader`:
+> 1. Print the total number of samples in the dataset and DataLoader.
+> 2. Iterate one time over both and print the shape of items you retrieve.
+%% Cell type:code id:244a8198-60c5-4154-93ab-3d96fbf3488a tags:
+``` python
+# Total number of samples
+# ...
+# Dataset elements
+# ...
+# DataLoader elements
+# ...
+```
+%% Cell type:markdown id:882438f7-3cc7-4a20-a223-41ede7856ef4 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+When you iterate over the dataset, each item you get from the iteration should be a tuple of (input, target), so you should retrieve two elements each of len 1.
+On the other hand, when you iterate over the data_loader, each item you get from the iteration is a mini-batch of data. Thus, the length you get from each iteration should correspond to the batch size you've set (i.e., 5 in our case), except possibly the last batch if the dataset size isn't a perfect multiple of the batch size.
+```python
+# Total number of samples
+print(f"Total samples in dataset: {len(dataset)}")
+print(f"Total batches in DataLoader: {len(data_loader)}")
+# Dataset elements
+(index, (data, target)) = next(enumerate(dataset))
+print(f"Sample {index}: Data shape {data.shape}, Target shape {target.shape}")
+# DataLoader elements
+(index, (batch_data, batch_target)) = next(enumerate(data_loader))
+print(f"Batch {index}: Data shape {batch_data.shape}, Target shape {batch_target.shape}")
+```
+</details>
+%% Cell type:markdown id:8dc08bb3-e5b2-4a7d-be10-6adc496a812d tags:
+### **Splitting the Dataset: Training, Validation, and Testing Sets**
+%% Cell type:markdown id:659a4899-cb14-4a47-b990-ea1a77592102 tags:
+When training neural networks, it's common to split the dataset into at least two sets:
+1. **Training Set**: This set is used to train the model, i.e., adjust the weights using gradient descent.
+2. **Validation Set** (optional, but often used): This set is used to evaluate the model during training, allowing for hyperparameter tuning without overfitting.
+3. **Test Set**: This set is used to evaluate the model's performance after training, providing an unbiased assessment of its performance on new, unseen data.
+In PyTorch, we can use the `random_split` function from `torch.utils.data` to easily split datasets.
+First, let's define the lengths for each split:
+%% Cell type:code id:32202871-2911-44e6-8ad6-6d848cb3ede0 tags:
+``` python
+total_samples = len(dataset)
+train_size = int(0.8 * total_samples)
+val_size = total_samples - train_size
+```
+%% Cell type:markdown id:a1f7a839-8ee0-460f-bef0-87ca30f7409e tags:
+> **Task**: Using the random_split function, split the dataset into a training set and a validation set using the sizes provided above.
+[Here's the documentation for random_split](https://pytorch.org/docs/stable/data.html#torch.utils.data.random_split).
+> **Task**: Create the train_loader and val_loader
+%% Cell type:code id:50a80fc9-ef6e-4118-ad6a-3dea9d16e94f tags:
+``` python
+# Splitting the dataset
+```
+%% Cell type:markdown id:b01bb0d7-17c0-4edd-a2b6-17e4ca74b2aa tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+# Splitting the dataset
+from torch.utils.data import random_split
+train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
+train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
+```
+</details>
+%% Cell type:markdown id:e2729431-701c-4451-931c-2ae0ed58dbb5 tags:
+> **Task**: Now, using the provided training and validation datasets, print out the number of samples in each set. Also, fetch one sample from each set and print its shape.
+%% Cell type:code id:770c42f6-7a52-4856-a4fe-23a60666389a tags:
+``` python
+# Your code here
+```
+%% Cell type:markdown id:583948e8-898a-4336-92c6-aaddef6adbcf tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+# Print number of samples in each set
+print(f"Number of training samples: {len(train_dataset)}")
+print(f"Number of validation samples: {len(val_dataset)}")
+# Fetching one sample from each set and printing its shape
+train_sample, train_target = train_dataset[0]
+print(f"Training sample shape: {train_sample.shape}, Target shape: {train_target.shape}")
+val_sample, val_target = val_dataset[0]
+print(f"Validation sample shape: {val_sample.shape}, Target shape: {val_target.shape}")
+```
+</details>
+%% Cell type:markdown id:0fdec6d6-9b32-457d-b8e6-d94d8e020e4f tags:
+### **Loss Functions: Measuring Model Errors**
+%% Cell type:markdown id:899ce66c-e878-4f6a-b37c-34cdeae438a1 tags:
+Every training process needs a metric to determine how well the model's predictions align with the actual data. This metric is called the loss function or cost function. PyTorch provides many [loss functions](https://pytorch.org/docs/stable/nn.html#loss-functions) suitable for different types of tasks.
+Different problems might require different loss functions. PyTorch provides a variety of [loss functions](https://pytorch.org/docs/stable/nn.html#loss-functions) suited for different tasks. For instance:
+- **Mean Squared Error (MSE)**: Commonly used for regression tasks.
+- **Cross-Entropy Loss**: Suited for classification tasks.
+For a simple regression task, a common choice is the Mean Squared Error (MSE) loss.
+> **Task**: Familiarize yourself with the [MSE loss documentation](https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html). You will soon use it in the training loop.
+> **Task**:  Instantiate the Mean Squared Error (MSE) loss provided by PyTorch for our current neural network.
+%% Cell type:code id:692e83d7-7382-4ab2-9caf-daa3a77bfd4d tags:
+``` python
+# Define the loss function.
+```
+%% Cell type:markdown id:7fe8dcb5-8a43-4561-88a0-a4a2a2d1bf53 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To define the MSE loss in PyTorch, you can use:
+```python
+criterion = nn.MSELoss()
+```
+</details>
+%% Cell type:markdown id:e957d999-0a56-4320-808a-05d1af6b81c7 tags:
+### **Optimizers: Adjusting Weights**
+%% Cell type:markdown id:d3d4a09d-8838-4fd3-9e16-bfdc5018abde tags:
+Optimizers adjust the weights of the network based on the gradients computed during backpropagation. Different optimizers might update weights in varying ways. For example, the popular **Stochastic Gradient Descent (SGD)** optimizer simply updates weights in the direction of negative gradients, while **Adam** and **RMSprop** are more advanced optimizers that consider aspects like momentum and weight decay.
+PyTorch offers a wide range of [optimizers](https://pytorch.org/docs/stable/optim.html).
+> **Task**: Review the [SGD optimizer documentation](https://pytorch.org/docs/stable/optim.html#torch.optim.SGD). It will be pivotal in the training loop you'll construct.
+> **Task**: For this exercise, let's use the SGD optimizer. Instantiate it, setting our neural network parameters as the ones to be optimized and choosing a learning rate of 0.01.
+%% Cell type:code id:39c8dfa8-7ea0-44e4-9429-118a6333bfe1 tags:
+``` python
+# Define the optimizer.
+```
+%% Cell type:markdown id:05e37f67-519a-4c49-97b3-2fafb7176de1 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To define the SGD optimizer in PyTorch, you can use:
+```python
+optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
+```
+Because of how simple the task is, you will probably need a really small learning rate to reach good results.
+</details>
+%% Cell type:markdown id:13b2fb3e-5391-4e66-ba83-55e66935d2aa tags:
+### **Setting Up the Basic Training Loop Function**
+%% Cell type:markdown id:7a364925-b4d9-4ffd-b3f8-be30a5bb1613 tags:
+Having a training loop within a function allows us to reuse the same code structure for different models, datasets, or other training parameters without redundancy. This modular approach also promotes code clarity and maintainability.
+Let's define the training loop function which takes the model, data (inputs and targets), loss function, optimizer, and the number of epochs as parameters. The function should return the history of the loss after each epoch.
+A typical training loop consists of:
+1. Sending the input through the model (forward pass).
+2. Calculating the loss.
+3. Propagating the loss backward through the model to compute gradients (backward pass).
+4. Updating the weights using the optimizer.
+5. Repeating the steps for several epochs.
+Training with the entire dataset as one batch can be memory-intensive and sometimes not as effective. Hence, in practice, we usually divide our dataset into smaller chunks or mini-batches and update our weights after each mini-batch.
+> **Task**: Create a function named `train_model` that encapsulates the training loop for the `SimpleNet` model. The function should follow the signature the next code cell:
+%% Cell type:code id:734864fe-46b6-4435-b58d-19b085ebd3f9 tags:
+``` python
+def train_model(model, dataloader, loss_function, optimizer, epochs):
+    # Your code here
+    pass
+```
+%% Cell type:markdown id:a6fee8dc-59da-4d48-918e-d6e093e997e5 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Here's how the train_model function might look:
+```python
+def train_model(model, dataloader, loss_function, optimizer, epochs):
+    # Store the loss values at each epoch
+    loss_history = []
+    for epoch in range(epochs):
+        for inputs, targets in dataloader:
+            # Ensure that data is on the right device
+            inputs, targets = inputs.to(device), targets.to(device)
+            # Reset the gradients to zero
+            optimizer.zero_grad()
+            # Execute a forward pass
+            outputs = model(inputs)
+            # Calculate the loss
+            loss = loss_function(outputs, targets)
+            # Conduct a backward pass
+            loss.backward()
+            # Update the weights
+            optimizer.step()
+            # Append the loss to the history
+            loss_history.append(loss.item())
+        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss_history[-1]:.4f}")
+    return loss_history
+```
+</details>
+%% Cell type:markdown id:c4e4b485-ffa6-487d-8dbc-b0b0590a796a tags:
+### **Training the Neural Network**
+%% Cell type:markdown id:15ba6b07-728f-4444-a3a9-af8cfeb884e1 tags:
+With all the components defined in the previous sections, it's now time to integrate everything and set the training process in motion.
+> **Task**: Combine all the previously defined elements to initiate the training procedure for your neural network model.
+> 1. Don't forget to Move your model and to the same device (GPU or CPU).
+> 2. Train the model using the `train_loader` and `val_loader`.
+%% Cell type:code id:90d043f7-213d-42a7-a14b-e6b716003b70 tags:
+``` python
+# Your code here to initiate the training process
+```
+%% Cell type:markdown id:398aaeec-5d6d-4ef6-bd24-27d51b32c148 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+To train the model, you need to integrate all the previously defined components:
+```python
+# Moving the model to the device
+model = SimpleNet(input_size=1, hidden_size=10, output_size=1).to(device)
+# Training the model using the train_loader
+loss_history = train_model(model, train_loader, criterion, optimizer, epochs=50)
+```
+Make sure you have defined the loss_function, optimizer, and epochs in the previous sections.
+</details>
+%% Cell type:code id:c7cf3df1-9fe2-4eee-a5bf-386f77b257f1 tags:
+``` python
+import matplotlib.pyplot as plt
+# Plotting the loss curve
+plt.figure(figsize=(10,6))
+plt.plot(loss_history, label='Training Loss')
+plt.title("Loss Curve")
+plt.xlabel("Epochs")
+plt.ylabel("Loss")
+plt.legend()
+plt.grid(True)
+plt.show()
+```
+%% Cell type:markdown id:2b7f9d87-c172-427c-a2f4-1090b1120148 tags:
+## **Conclusion: Moving Beyond the Basics**
+%% Cell type:markdown id:6074877c-c149-4af9-8503-153455edd42a tags:
+You've now built and trained a simple neural network using PyTorch, and you might be wondering: why aren't my results as good as I expected?
+While you've certainly made strides, the journey of mastering deep learning and neural networks is filled with nuance, challenges, and constant learning. Here are some reasons why your results might not be optimal and what you'll discover in your next steps:
+1. **Hyperparameters Tuning**: So far, we've set values like learning rate and batch size somewhat arbitrarily. These values are critical and often require careful tuning specific to each problem.
+2. **Learning Rate Scheduling**: A fixed learning rate might not always be the best strategy. Reducing the learning rate during training, known as learning rate annealing or scheduling, often leads to better convergence.
+3. **Model Architecture**: The neural network we built is basic. There's an entire world of architectures out there, designed for specific types of data and tasks. The right architecture can make a significant difference.
+4. **Regularization**: To prevent overfitting, techniques like dropout, weight decay, and early stopping can be applied. We haven't touched upon these, but they're crucial for ensuring your model generalizes well to unseen data.
+5. **Data Quality and Quantity**: While we used synthetic data for simplicity, real-world data is messy. Cleaning and preprocessing data, augmenting it, and ensuring it's representative can have a significant impact on performance.
+6. **Optimization Techniques**: There are advanced optimization algorithms and techniques that can speed up training and lead to better convergence. Techniques like momentum, adaptive learning rates (e.g., Adam, RMSprop) can play a crucial role.
+7. **Evaluation Metrics**: We've looked at loss values, but in real-world scenarios, understanding and selecting the right evaluation metrics for the task (accuracy, F1-score, AUC-ROC, etc.) is vital.
+8. **Training Dynamics**: Understanding how models train, visualizing the activations, weights, and gradients, and knowing when and why a model is struggling can offer insights into how to improve performance.
+Remember, while the mechanics of building and training a neural network are essential, the art of deep learning lies in understanding the nuances and iterating based on insights and knowledge. The next steps in your learning, focusing on methodology, will provide the tools and knowledge to navigate these complexities and achieve better results.
+Keep learning, experimenting, and iterating! The world of deep learning is vast, and there's always something new to discover.
+%% Cell type:markdown id:ca6048e4-f3cf-40eb-bd50-c95f281f0554 tags:
+## **Extra for the Fast Movers: Diving Deeper**
+%% Cell type:markdown id:46a25dfd-1cc9-444d-98d6-966e7cc9da07 tags:
+To further enhance your understanding and capability with PyTorch, this section introduces additional topics that cater to more advanced use-cases. These tools and techniques can be essential when dealing with larger and more complex projects, providing valuable insights into optimization and performance.
+%% Cell type:markdown id:30edeed8-321b-4b1f-ace6-0decd8a167e5 tags:
+### **Profiling with PyTorch Profiler in TensorBoard**
+%% Cell type:markdown id:256bd4a2-aa6f-4a50-9c5d-854ca25293de tags:
+PyTorch, starting from version 1.9.0, incorporates the PyTorch Profiler as a TensorBoard plugin. This integration allows users to profile their PyTorch code and visualize the results directly within TensorBoard.
+Below, we will be instrumenting PyTorch Code for TensorBoard Profiling.
+Use this [documentation](http://www.idris.fr/jean-zay/pre-post/profiler_pt.html) to achieve the next tasks.
+> **Task:** Before instrumenting your PyTorch code, you'll need to import the necessary modules for profiling.
+> **Task:** Modify the training loop to invoke the profiler.
+%% Cell type:code id:86b471a6-7de6-40f0-af58-c41e8e8acbae tags:
+``` python
+# Your imports here
+# Your code here
+def train_model_with_profiling(model, train_loader, criterion, optimizer, epochs, profiler_dir='./profiler'):
+    # Your code here
+    pass
+```
+%% Cell type:markdown id:f389816a-fa2a-4668-9f0b-07d2a5abf5e1 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+```python
+from torch.profiler import profile, tensorboard_trace_handler, ProfilerActivity, schedule
+def train_model_with_profiling(model, dataloader, loss_function, optimizer, epochs, profiler_dir='./profiler'):
+    # Store the loss values at each epoch
+    loss_history = []
+    with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+                 schedule=schedule(wait=1, warmup=1, active=12, repeat=1),
+                 on_trace_ready=tensorboard_trace_handler(profiler_dir)) as prof:
+        for epoch in range(epochs):
+            for inputs, targets in dataloader:
+                # Ensure that data is on the right device
+                inputs, targets = inputs.to(device), targets.to(device)
+                # Reset the gradients to zero
+                optimizer.zero_grad()
+                # Execute a forward pass
+                outputs = model(inputs)
+                # Calculate the loss
+                loss = loss_function(outputs, targets)
+                # Conduct a backward pass
+                loss.backward()
+                # Update the weights
+                optimizer.step()
+                # Append the loss to the history
+                loss_history.append(loss.item())
+                # Notify profiler of step boundary
+                prof.step()
+            print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss_history[-1]:.4f}")
+    return loss_history
+```
+Make sure you have defined the loss_function, optimizer, and epochs in the previous sections.
+</details>
+%% Cell type:code id:cb82f0a9-522f-4746-87f9-ba7b7952d863 tags:
+``` python
+# Training the model using the train_loader
+loss_history = train_model_with_profiling(model, train_loader, criterion, optimizer, 10, profiler_dir='./profiler')
+```
+%% Cell type:markdown id:313e4f40-521a-4beb-a278-c1ca9502b499 tags:
+> **Task:** Visualize the profiling, you will need to open a Tensorboard interface using the Blue button on the top left corner.
+>
+> **Make sur to specify the logdir with "--logid=/path/to/profiler_folder".**
+%% Cell type:markdown id:06f86768-3b78-4874-b083-64bc365080fb tags:
+### **Learning Rate Scheduling**
+%% Cell type:markdown id:44721444-ba4a-44d0-9b65-16890dd4f097 tags:
+One of the key hyperparameters to tune during neural network training is the learning rate. While it's possible to set a static learning rate for the entire training process, in practice, dynamically adjusting the learning rate often leads to better convergence and overall performance. This dynamic adjustment is often referred to as learning rate scheduling or annealing.
+Concept of Learning Rate Scheduling
+The learning rate determines the step size at each iteration while moving towards a minimum of the loss function. If it's too large, the optimization might overshoot the minimum. Conversely, if it's too small, the training might get stuck, or convergence could be very slow.
+A learning rate scheduler changes the learning rate during training based on the provided scheduling policy. By adjusting the learning rate during training, you can achieve faster convergence and better final results.
+Using Learning Rate Schedulers in PyTorch
+PyTorch provides a variety of learning rate schedulers through the torch.optim.lr_scheduler module. Some of the popular ones are:
+- StepLR: Decays the learning rate of each parameter group by gamma every step_size epochs.
+- ExponentialLR: Decays the learning rate of each parameter group by gamma every epoch.
+- ReduceLROnPlateau: Reduces the learning rate when a metric has stopped improving.
+> **Task:** Take a look at the [documentation]() or click on the hint in the following cell then integrate an LR scheduler in your own code that you wrote before
+%% Cell type:markdown id:0c79a170-35d0-438f-b01b-a3f236f8b724 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Below, you have a typical training loop with a learning rate scheduler.
+```python
+from torch.optim.lr_scheduler import StepLR
+optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
+scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
+for epoch in range(epochs):
+    for input, target in data:
+        optimizer.zero_grad()
+        output = model(input)
+        loss = loss_fn(output, target)
+        loss.backward()
+        optimizer.step()
+    # Step the learning rate scheduler
+    scheduler.step()```
+</details>
+%% Cell type:markdown id:33f99f6e-3120-495a-a25b-8b9f3d14deb2 tags:
+### **Automatic Mixed Precision**
+%% Cell type:markdown id:217a7249-6655-4587-92b8-72dea7de8c9d tags:
+Training deep neural networks can be both time-consuming and resource-intensive. One way to address this problem is by leveraging mixed precision training. In essence, mixed precision training uses both 16-bit and 32-bit floating-point types to represent numbers in the model, which can speed up training without sacrificing the accuracy of the final model.
+**Overview of AMP (Automatic Mixed Precision)**
+AMP (Automatic Mixed Precision) is a set of utilities provided by PyTorch to enable mixed precision training more effortlessly. The main advantages of AMP are:
+- Faster Training: By using reduced precision, the model requires less memory bandwidth, resulting in faster data transfers and faster matrix multiplication.
+- Reduced GPU Memory Usage: This enables training of larger models or utilization of larger batch sizes.
+PyTorch has integrated the AMP utilities starting from version 1.6.
+> **Task**: Setup AMP in the training function by checking the [documentation](http://www.idris.fr/eng/ia/mixed-precision-eng.html). You will need to do the necessary imports, initialize the GradScaler, modify the training loop by including "with autocast():" around the forward and loss computation.
+%% Cell type:code id:ad131b4b-02ba-472d-af78-a048868e3efc tags:
+``` python
+# Your code here
+```
+%% Cell type:markdown id:de38cb30-7b24-48cb-b804-ed296e38e3fb tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+Below, you have a typical training loop with autocast.
+```python
+from torch.cuda.amp import autocast, GradScaler
+scaler = GradScaler()
+for epoch in epochs:
+    for input, target in data:
+        optimizer.zero_grad()
+        with autocast():
+            output = model(input)
+            loss = loss_fn(output, target)
+        scaler.scale(loss).backward()
+        scaler.step(optimizer)
+        scaler.update()
+```
+</details>
+%% Cell type:markdown id:a3f7818a-fea1-4a12-b52a-cd83e0ae2ffe tags:
+### **Pytorch Compiler**
+%% Cell type:markdown id:dbb5f69b-009e-40b3-94f0-5a420afbd003 tags:
+**For this section, you will need to use Pytorch with a version superior to 2.0.**
+PyTorch, a widely adopted deep learning framework, has consistently evolved to offer users better performance and ease of use. One such advancement is the introduction of the PyTorch Compiler. This cutting-edge feature accelerates PyTorch code execution by JIT-compiling it into optimized kernels. What's even more impressive is its ability to enhance performance with minimal modifications to the original codebase.
+Historically, PyTorch has introduced compiler solutions like TorchScript and FX Tracing. However, the introduction of torch.compile with PyTorch 2.0 has taken performance optimization to a new level. It provides a seamless experience, enabling you to transform typical PyTorch functions and even torch.nn.Module instances into their faster, compiled counterparts.
+For those eager to dive deep into its workings and benefits, detailed documentation and tutorials have been made available:
+- [torch.compile Tutorial](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html)
+- [PyTorch 2.0 Release Notes](https://pytorch.org/get-started/pytorch-2.0/)
+> **Task:**     Your task is to make your existing PyTorch model take advantage of the performance benefits offered by torch.compile. This will not only make your model run faster but also give you hands-on experience with one of the latest features in PyTorch.
+%% Cell type:markdown id:8d5236bc-08e4-4142-8c9c-fd7007474ff2 tags:
+<details>
+<summary>Hint (click to reveal)</summary>
+1. **Ensure Dependencies**:
+   - Ensure that you have the required dependencies, especially PyTorch version 2.0 or higher.
+2. **Check for GPU Compatibility**:
+   - For optimal performance, it's recommended to use a modern NVIDIA GPU (H100, A100, or V100).
+3. **Compile Functions**:
+   - You can optimize arbitrary Python functions as shown in the example:
+     ```python
+     def your_function(x, y):
+         # ... Your PyTorch code here ...
+     opt_function = torch.compile(your_function)
+     ```
+   - Alternatively, use the decorator approach:
+     ```python
+     @torch.compile
+     def opt_function(x, y):
+         # ... Your PyTorch code here ...
+     ```
+4. **Compile Modules**:
+   - If you have a PyTorch module (a class derived from `torch.nn.Module`), you can compile it similarly:
+     ```python
+     class YourModule(torch.nn.Module):
+         # ... Your module definition here ...
+     model = YourModule()
+     opt_model = torch.compile(model)
+     ```
+</details>
+%% Cell type:markdown id:bd4066a6-3f24-4b63-b2be-da0350ec6145 tags:
+Remember, while torch.compile optimizes performance, the underlying logic remains the same. Ensure to test and validate your compiled model's outputs against the original to confirm consistent behavior.
+%% Cell type:markdown id:4340d5df tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/Misc/04-Using-Tensorboard.ipynb
+++ b/Misc/04-Using-Tensorboard.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [TSB1] - Tensorboard with/from Jupyter
+<!-- DESC --> 4 ways to use Tensorboard from the Jupyter environment
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+  - Using [**Tensorboard**](https://www.tensorflow.org/tensorboard/get_started)
+## What we're going to do :
+ - Using Tensorboard
+%% Cell type:markdown id: tags:
+## In the Fidle environment  :
+To access logs with tensorboad :
+- Under **Docker**, from a terminal launched via the jupyterlab launcher, use the following command:<br>
+```tensorboard --logdir <path-to-logs> --host 0.0.0.0```
+- If you're **not using Docker**, from a terminal :<br>
+```tensorboard --logdir <path-to-logs>```
+**Note:** One tensorboard instance can be used simultaneously.
+%% Cell type:markdown id: tags:
+## Otherwise, in the real world, from Jupyter (***)
+It's the easiest and the best way \!
+Launch Tensorboard directly from Jupiter.
+Works very fine on Jean-Zay (at IDRIS) :-)
+%% Cell type:markdown id: tags:
+## Otherwise, in the real word, Tensorboard as a magic command (**)
+Tensorboard can be run from Jupiter with a magic command.
+See [documentation](https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks)
+Load the extention : ```%load_ext tensorboard```
+Start tensorboard : ```%tensorboard --logdir logs```
+%% Cell type:raw id: tags:
+%load_ext tensorboard
+%tensorboard --logdir logs
+%% Cell type:markdown id: tags:
+## Otherwise, in the real word,  Option 2 - Shell command (*)
+Basic way, from a shell
+More about it : `# tensorboard --help`
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [TSB1] - Tensorboard with/from Jupyter
+<!-- DESC --> 4 ways to use Tensorboard from the Jupyter environment
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+  - Using [**Tensorboard**](https://www.tensorflow.org/tensorboard/get_started)
+## What we're going to do :
+ - Using Tensorboard
+%% Cell type:markdown id: tags:
+## In the Fidle environment  :
+To access logs with tensorboad :
+- Under **Docker**, from a terminal launched via the jupyterlab launcher, use the following command:<br>
+```tensorboard --logdir <path-to-logs> --host 0.0.0.0```
+- If you're **not using Docker**, from a terminal :<br>
+```tensorboard --logdir <path-to-logs>```
+**Note:** One tensorboard instance can be used simultaneously.
+%% Cell type:markdown id: tags:
+## Otherwise, in the real world, from Jupyter (***)
+It's the easiest and the best way \!
+Launch Tensorboard directly from Jupiter.
+Works very fine on Jean-Zay (at IDRIS) :-)
+%% Cell type:markdown id: tags:
+## Otherwise, in the real word, Tensorboard as a magic command (**)
+Tensorboard can be run from Jupiter with a magic command.
+See [documentation](https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks)
+Load the extention : ```%load_ext tensorboard```
+Start tensorboard : ```%tensorboard --logdir logs```
+%% Cell type:raw id: tags:
+%load_ext tensorboard
+%tensorboard --logdir logs
+%% Cell type:markdown id: tags:
+## Otherwise, in the real word,  Option 2 - Shell command (*)
+Basic way, from a shell
+More about it : `# tensorboard --help`
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/Misc/05-RNN.ipynb
+++ b/Misc/05-RNN.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [K3LSTM1] - Basic Keras LSTM Layer
+<!-- DESC --> A small example of an LSTM layer in Keras
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+%% Cell type:code id: tags:
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+import keras
+import numpy as np
+```
+%% Cell type:code id: tags:
+``` python
+input = keras.random.normal( [32, 20, 8] )
+lstm    = keras.layers.LSTM(16)
+output  = lstm(input)
+print('input shape is  : ',input.shape)
+print('output shape is : ',output.shape)
+```
+%% Cell type:code id: tags:
+``` python
+input = keras.random.normal( [32, 20, 8] )
+lstm = keras.layers.LSTM(18, return_sequences=True, return_state=True)
+output, memory_state, carry_state = lstm(input)
+print('input shape  : ',input.shape)
+print('output shape : ',output.shape)
+print('memory_state : ', memory_state.shape)
+print('carry_state  : ', memory_state.shape)
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [K3LSTM1] - Basic Keras LSTM Layer
+<!-- DESC --> A small example of an LSTM layer in Keras
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+%% Cell type:code id: tags:
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+import keras
+import numpy as np
+```
+%% Cell type:code id: tags:
+``` python
+input = keras.random.normal( [32, 20, 8] )
+lstm    = keras.layers.LSTM(16)
+output  = lstm(input)
+print('input shape is  : ',input.shape)
+print('output shape is : ',output.shape)
+```
+%% Cell type:code id: tags:
+``` python
+input = keras.random.normal( [32, 20, 8] )
+lstm = keras.layers.LSTM(18, return_sequences=True, return_state=True)
+output, memory_state, carry_state = lstm(input)
+print('input shape  : ',input.shape)
+print('output shape : ',output.shape)
+print('memory_state : ', memory_state.shape)
+print('carry_state  : ', memory_state.shape)
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/GTSRB/05.1-Full-convolutions-batch.ipynb
+++ b/GTSRB/05.1-Full-convolutions-batch.ipynb
 %% Cell type:markdown id: tags:
-German Traffic Sign Recognition Benchmark (GTSRB)
+<img width="800px" src="../fidle/img/header.svg"></img>
-=================================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
-## Episode 5.1 : Full Convolutions / run
+# <!-- TITLE --> [PGRAD1] - Gradient illustration with PyTorch
+<!-- DESC --> Exemple de calcul d'un gradient avec PyTorch
-Our main steps:
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
- - Run Full-convolution.ipynb as a batch :
-    - Notebook mode
-    - Script mode
- - Tensorboard follow up
-## 1/ Run a notebook as a batch
+## Objectives :
-To run a notebook :
+ - Exemple de calcul d'un gradient avec PyTorch
-```jupyter nbconvert --to notebook --execute <notebook>```
-%% Cell type:raw id: tags:
+## What we're going to do :
-%%bash
+- Exemple de calcul d'un gradient avec PyTorch
-# ---- This will execute and save a notebook
+%% Cell type:code id: tags:
-#
-jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --output='./run/full_convolutions' --execute '05-Full-convolutions.ipynb'
+``` python
+import torch
+```
 %% Cell type:markdown id: tags:
-## 2/ Export as a script (better choice)
+## Pure Python
-To export a notebook as a script :
-```jupyter nbconvert --to script <notebook>```
-To run the script :
-```ipython <script>```
 %% Cell type:code id: tags:
 ``` python
-%%bash
+# ---- My basic function f
+def f(x):
+    y = x*x + 4*x - 5
+    return y
+def df(x):
+    y=2*x + 4
+    return y
+# ---- Examples :
+print('f(1) is : ', f(1))
+print('f(2) is : ', f(2))
-# ---- This will convert a notebook to a notebook.py script
+print('df(3) is : ',df(3))
-#
-jupyter nbconvert --to script --output='./run/full_convolutions_B' '05-Full-convolutions.ipynb'
 ```
 %% Output
-    [NbConvertApp] Converting notebook 05-Full-convolutions.ipynb to script
+    f(1) is :  0
-    [NbConvertApp] Writing 11305 bytes to ./run/full_convolutions_B.py
+    f(2) is :  7
+    df(3) is :  10
+%% Cell type:markdown id: tags:
+## Using Torch
+%% Cell type:markdown id: tags:
+Get a nice tensor, with `requires_grad=True` :-)
 %% Cell type:code id: tags:
 ``` python
-!ls -l ./run/*.py
+x = torch.tensor(3.0, requires_grad = True)
+print("x:", x)
 ```
 %% Output
-    -rw-r--r-- 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py
+    x: tensor(3., requires_grad=True)
 %% Cell type:markdown id: tags:
-## 3/ Batch submission
+Define our function..
-Create batch script :
 %% Cell type:code id: tags:
 ``` python
-%%writefile "./run/batch_full_convolutions_B.sh"
+y = x*x + 4*x + - 5
-#!/bin/bash
-#OAR -n Full convolutions
-#OAR -t gpu
-#OAR -l /nodes=1/gpudevice=1,walltime=01:00:00
-#OAR --stdout _batch/full_convolutions_%jobid%.out
-#OAR --stderr _batch/full_convolutions_%jobid%.err
-#OAR --project deeplearningshs
-#---- For cpu
-# use :
-# OAR -l /nodes=1/core=32,walltime=01:00:00
-# and add a 2>/dev/null to ipython xxx
-# ----------------------------------
-#   _           _       _
-#  | |__   __ _| |_ ___| |__
-#  | '_ \ / _` | __/ __| '_ \
-#  | |_) | (_| | || (__| | | |
-#  |_.__/ \__,_|\__\___|_| |_|
-#                  Full convolutions
-# ----------------------------------
-#
-CONDA_ENV=deeplearning2
-RUN_DIR=~/fidle/GTSRB
-RUN_SCRIPT=./run/full_convolutions_B.py
-# ---- Cuda Conda initialization
-#
-echo '------------------------------------------------------------'
-echo "Start : $0"
-echo '------------------------------------------------------------'
-#
-source /applis/environments/cuda_env.sh dahu 10.0
-source /applis/environments/conda.sh
-#
-conda activate "$CONDA_ENV"
-# ---- Run it...
-#
-cd $RUN_DIR
-ipython $RUN_SCRIPT
 ```
-%% Output
+%% Cell type:markdown id: tags:
-    Writing ./run/batch_full_convolutions_B.sh
+Compute gradient with the backward function
 %% Cell type:code id: tags:
 ``` python
-%%bash
+y.backward()
-chmod 755 ./run/*.sh
-chmod 755 ./run/*.py
-ls -l ./run/*full_convolutions*
 ```
-%% Output
+%% Cell type:code id: tags:
-    -rwxr-xr-x 1 pjluc pjluc  1045 Jan 21 00:15 ./run/batch_full_convolutions_B.sh
+``` python
-    -rwxr-xr-x 1 pjluc pjluc   611 Jan 19 15:53 ./run/batch_full_convolutions.sh
+dx=x.grad
-    -rwxr-xr-x 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py
+print('dx=',dx)
+```
-%% Cell type:raw id: tags:
+%% Output
-%%bash
+    dx= tensor(10.)
-./run/batch_full_convolutions.sh
 %% Cell type:code id: tags:
 ``` python
 ```

 %% Cell type:markdown id: tags:
-German Traffic Sign Recognition Benchmark (GTSRB)
+<img width="800px" src="../fidle/img/header.svg"></img>
-=================================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
-## Episode 5.1 : Full Convolutions / run
+# <!-- TITLE --> [PGRAD1] - Gradient illustration with PyTorch
+<!-- DESC --> Exemple de calcul d'un gradient avec PyTorch
-Our main steps:
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
- - Run Full-convolution.ipynb as a batch :
-    - Notebook mode
-    - Script mode
- - Tensorboard follow up
-## 1/ Run a notebook as a batch
+## Objectives :
-To run a notebook :
+ - Exemple de calcul d'un gradient avec PyTorch
-```jupyter nbconvert --to notebook --execute <notebook>```
-%% Cell type:raw id: tags:
+## What we're going to do :
-%%bash
+- Exemple de calcul d'un gradient avec PyTorch
-# ---- This will execute and save a notebook
+%% Cell type:code id: tags:
-#
-jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --output='./run/full_convolutions' --execute '05-Full-convolutions.ipynb'
+``` python
+import torch
+```
 %% Cell type:markdown id: tags:
-## 2/ Export as a script (better choice)
+## Pure Python
-To export a notebook as a script :
-```jupyter nbconvert --to script <notebook>```
-To run the script :
-```ipython <script>```
 %% Cell type:code id: tags:
 ``` python
-%%bash
+# ---- My basic function f
+def f(x):
+    y = x*x + 4*x - 5
+    return y
+def df(x):
+    y=2*x + 4
+    return y
+# ---- Examples :
+print('f(1) is : ', f(1))
+print('f(2) is : ', f(2))
-# ---- This will convert a notebook to a notebook.py script
+print('df(3) is : ',df(3))
-#
-jupyter nbconvert --to script --output='./run/full_convolutions_B' '05-Full-convolutions.ipynb'
 ```
 %% Output
-    [NbConvertApp] Converting notebook 05-Full-convolutions.ipynb to script
+    f(1) is :  0
-    [NbConvertApp] Writing 11305 bytes to ./run/full_convolutions_B.py
+    f(2) is :  7
+    df(3) is :  10
+%% Cell type:markdown id: tags:
+## Using Torch
+%% Cell type:markdown id: tags:
+Get a nice tensor, with `requires_grad=True` :-)
 %% Cell type:code id: tags:
 ``` python
-!ls -l ./run/*.py
+x = torch.tensor(3.0, requires_grad = True)
+print("x:", x)
 ```
 %% Output
-    -rw-r--r-- 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py
+    x: tensor(3., requires_grad=True)
 %% Cell type:markdown id: tags:
-## 3/ Batch submission
+Define our function..
-Create batch script :
 %% Cell type:code id: tags:
 ``` python
-%%writefile "./run/batch_full_convolutions_B.sh"
+y = x*x + 4*x + - 5
-#!/bin/bash
-#OAR -n Full convolutions
-#OAR -t gpu
-#OAR -l /nodes=1/gpudevice=1,walltime=01:00:00
-#OAR --stdout _batch/full_convolutions_%jobid%.out
-#OAR --stderr _batch/full_convolutions_%jobid%.err
-#OAR --project deeplearningshs
-#---- For cpu
-# use :
-# OAR -l /nodes=1/core=32,walltime=01:00:00
-# and add a 2>/dev/null to ipython xxx
-# ----------------------------------
-#   _           _       _
-#  | |__   __ _| |_ ___| |__
-#  | '_ \ / _` | __/ __| '_ \
-#  | |_) | (_| | || (__| | | |
-#  |_.__/ \__,_|\__\___|_| |_|
-#                  Full convolutions
-# ----------------------------------
-#
-CONDA_ENV=deeplearning2
-RUN_DIR=~/fidle/GTSRB
-RUN_SCRIPT=./run/full_convolutions_B.py
-# ---- Cuda Conda initialization
-#
-echo '------------------------------------------------------------'
-echo "Start : $0"
-echo '------------------------------------------------------------'
-#
-source /applis/environments/cuda_env.sh dahu 10.0
-source /applis/environments/conda.sh
-#
-conda activate "$CONDA_ENV"
-# ---- Run it...
-#
-cd $RUN_DIR
-ipython $RUN_SCRIPT
 ```
-%% Output
+%% Cell type:markdown id: tags:
-    Writing ./run/batch_full_convolutions_B.sh
+Compute gradient with the backward function
 %% Cell type:code id: tags:
 ``` python
-%%bash
+y.backward()
-chmod 755 ./run/*.sh
-chmod 755 ./run/*.py
-ls -l ./run/*full_convolutions*
 ```
-%% Output
+%% Cell type:code id: tags:
-    -rwxr-xr-x 1 pjluc pjluc  1045 Jan 21 00:15 ./run/batch_full_convolutions_B.sh
+``` python
-    -rwxr-xr-x 1 pjluc pjluc   611 Jan 19 15:53 ./run/batch_full_convolutions.sh
+dx=x.grad
-    -rwxr-xr-x 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py
+print('dx=',dx)
+```
-%% Cell type:raw id: tags:
+%% Output
-%%bash
+    dx= tensor(10.)
-./run/batch_full_convolutions.sh
 %% Cell type:code id: tags:
 ``` python
 ```

--- a/GTSRB/99 Scripts-Tensorboard.ipynb
+++ b/GTSRB/99 Scripts-Tensorboard.ipynb
 %% Cell type:markdown id: tags:
-Running Tensorboard from Jupyter lab
+<img width="800px" src="../fidle/img/header.svg"></img>
-====================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
-Vesion : 1.0
-%% Cell type:markdown id: tags:
-## 1/ Méthode 1 : Shell command
-%% Cell type:code id: tags:
+# <!-- TITLE --> [FID1] - Exemple de notebook Fidle
+<!-- DESC --> Un simple exemple de notebook Fidle
-``` python
-%%bash
-tensorboard_start --logdir ./run/logs
-```
-%% Cell type:code id: tags:
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-``` python
+%% Cell type:markdown id: tags:
-%%bash
-tensorboard_status
-```
-%% Cell type:code id: tags:
-``` python
+> **Note :** Penser à bien remplir les tags TITLE, DESC et AUTHOR de la cellule ci-dessus (Voir source markdown)
-%%bash
-tensorboard_stop
-```
 %% Cell type:markdown id: tags:
-## Méthode 2 : Magic command
+# Step 1 - Init Python
-**Start**
+> Penser à **importer** le **module Fidle**
+> Penser à effectuer **l'initialisation de l'environnement Fidle**
+> `FID1` est l'identifiant du notebook (run_id)
+> `run_dir` est un dossier où mettre les outputs du notebook (typiquement ./run/<run_id>)
+> `datasets_dir` le dossier où sont les datasets Fidle
 %% Cell type:code id: tags:
 ``` python
-%load_ext tensorboard
+import torch
-```
-%% Cell type:code id: tags:
+import fidle
-``` python
+# Init Fidle environment
-%tensorboard --port 21277 --host 0.0.0.0 --logdir ./run/logs
+run_id, run_dir, datasets_dir = fidle.init('FID1')
 ```
 %% Cell type:markdown id: tags:
-**Stop**
+## Parameters
-No way... use bash method
+> Nous avons ici (par exemple) 3 paramètres : scale, x et batch_size
-## Methode 3  : Tensorboard module
-**Start**
 %% Cell type:code id: tags:
 ``` python
-import tensorboard.notebook as tsb
+scale = 0.1
+x=12
+batch_size=64
 ```
+%% Cell type:markdown id: tags:
+> L'appel ci-dessous permet de définir les  parametres modifiables lors d'une exécution batch via la commande fid `run_ci...`
 %% Cell type:code id: tags:
 ``` python
-tsb.start('--port 21277 --host 0.0.0.0 --logdir ./run/logs')
+fidle.override('scale', 'x','batch_size')
 ```
 %% Cell type:markdown id: tags:
-**Check**
+## Working part...
+(Tout ce que fait notre notebook...)
 %% Cell type:code id: tags:
 ``` python
-a=tsb.list()
+print('scale=',scale)
 ```
 %% Cell type:markdown id: tags:
-**Stop**
+## End part
-No way... use bash method
-%% Cell type:code id: tags:
+%% Cell type:markdown id: tags:
-``` python
+> Pour terminer le notebook, on peut :
-!kill 214798
+> - faire un `fidle.end()` pour afficher quelques infos utiles
-```
+> - insérer un beau logo en markdown
 %% Cell type:code id: tags:
 ``` python
+fidle.end()
 ```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

 %% Cell type:markdown id: tags:
-Running Tensorboard from Jupyter lab
+<img width="800px" src="../fidle/img/header.svg"></img>
-====================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
-Vesion : 1.0
-%% Cell type:markdown id: tags:
-## 1/ Méthode 1 : Shell command
-%% Cell type:code id: tags:
+# <!-- TITLE --> [FID1] - Exemple de notebook Fidle
+<!-- DESC --> Un simple exemple de notebook Fidle
-``` python
-%%bash
-tensorboard_start --logdir ./run/logs
-```
-%% Cell type:code id: tags:
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
-``` python
+%% Cell type:markdown id: tags:
-%%bash
-tensorboard_status
-```
-%% Cell type:code id: tags:
-``` python
+> **Note :** Penser à bien remplir les tags TITLE, DESC et AUTHOR de la cellule ci-dessus (Voir source markdown)
-%%bash
-tensorboard_stop
-```
 %% Cell type:markdown id: tags:
-## Méthode 2 : Magic command
+# Step 1 - Init Python
-**Start**
+> Penser à **importer** le **module Fidle**
+> Penser à effectuer **l'initialisation de l'environnement Fidle**
+> `FID1` est l'identifiant du notebook (run_id)
+> `run_dir` est un dossier où mettre les outputs du notebook (typiquement ./run/<run_id>)
+> `datasets_dir` le dossier où sont les datasets Fidle
 %% Cell type:code id: tags:
 ``` python
-%load_ext tensorboard
+import torch
-```
-%% Cell type:code id: tags:
+import fidle
-``` python
+# Init Fidle environment
-%tensorboard --port 21277 --host 0.0.0.0 --logdir ./run/logs
+run_id, run_dir, datasets_dir = fidle.init('FID1')
 ```
 %% Cell type:markdown id: tags:
-**Stop**
+## Parameters
-No way... use bash method
+> Nous avons ici (par exemple) 3 paramètres : scale, x et batch_size
-## Methode 3  : Tensorboard module
-**Start**
 %% Cell type:code id: tags:
 ``` python
-import tensorboard.notebook as tsb
+scale = 0.1
+x=12
+batch_size=64
 ```
+%% Cell type:markdown id: tags:
+> L'appel ci-dessous permet de définir les  parametres modifiables lors d'une exécution batch via la commande fid `run_ci...`
 %% Cell type:code id: tags:
 ``` python
-tsb.start('--port 21277 --host 0.0.0.0 --logdir ./run/logs')
+fidle.override('scale', 'x','batch_size')
 ```
 %% Cell type:markdown id: tags:
-**Check**
+## Working part...
+(Tout ce que fait notre notebook...)
 %% Cell type:code id: tags:
 ``` python
-a=tsb.list()
+print('scale=',scale)
 ```
 %% Cell type:markdown id: tags:
-**Stop**
+## End part
-No way... use bash method
-%% Cell type:code id: tags:
+%% Cell type:markdown id: tags:
-``` python
+> Pour terminer le notebook, on peut :
-!kill 214798
+> - faire un `fidle.end()` pour afficher quelques infos utiles
-```
+> - insérer un beau logo en markdown
 %% Cell type:code id: tags:
 ``` python
+fidle.end()
 ```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

--- a/Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb
+++ b/Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [OPT1] - Training setup optimization
+<!-- DESC --> The goal of this notebook is to go through a typical deep learning model training
+<!-- AUTHOR : Kamel Guerda (CNRS/IDRIS), Léo Hunout (CNRS/IDRIS) -->
+## Objectives :
+**Practice lab : Optimize your training process**
+%% Cell type:markdown id: tags:
+## Introduction
+This Lab takes place as a pratical exercice of the [fidle](https://fidle.cnrs.fr/) online course N°16.
+The goal of this notebook is to go through a typical deep learning model training. We will see what can be changed to optimize this training setup but also good practices to make more efficient experiments.
+This notebook makes use of:
+- The CIFAR10 dataset
+- A Resnet model
+- Pytorch
+- A GPU (the notebook can be ran on Jean-Zay if you have an account, on Google collab with a 16go gpu or at home with a dedicated gpu by scaling down the batch_size)
+In particular we will work on:
+- the dataloader strategy used to load data
+- the model initial weights, in particular using a pretrained model
+- the learning rate and learning rate scheduler
+- the optimizer
+- visualizing and comparing results using python, tensorboard
+- various good practices/reminders
+> First, you can do a complete execution of the notebook.
+> **Then comeback from the start and follow the instructions to edit various components for better performance. You can also change them during the first execution if you have some intuitions about what should be changed and how.**
+> **In order to compare performance, only change the xxx_optim variables which are the one you will use in your optimized training**
+%% Cell type:code id: tags:
+``` 
+!nvidia-smi
+```
+%% Cell type:markdown id: tags:
+## Few imports
+%% Cell type:code id: tags:
+``` 
+import os
+import time
+import random
+import numpy as np
+import torch
+from torch.cuda.amp import autocast, GradScaler
+from torch.optim.lr_scheduler import _LRScheduler
+import torchvision
+import torchvision.transforms as transforms
+import torchvision.models as models
+from torchvision.models.resnet import ResNet18_Weights
+import matplotlib.pyplot as plt
+from datetime import datetime
+from torch.utils.tensorboard import SummaryWriter
+```
+%% Cell type:markdown id: tags:
+## Fix random seeds
+In order to have experiment reproductibility, it is a good practice to fix the random number generators seeds.
+Warning : there might be more seeds to set than you expect! Maths,visualization,transformations libraries, ...
+%% Cell type:code id: tags:
+``` 
+random.seed(123)
+np.random.seed(123)
+torch.manual_seed(123)
+```
+%% Cell type:markdown id: tags:
+## Some functions
+Below we define a few functions that will be used further in the notebook.
+**Do not change them unless you know what and why you are doing it.**
+%% Cell type:code id: tags:
+``` 
+def iter_dataloader(dataloader, epochs, args):
+    for epoch in range(epochs):
+        for i, (images, labels) in enumerate(dataloader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+def evaluate(dataloader, model, criterion, args):
+    '''
+    A simple loop for evaluation
+    '''
+    loss = 0
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for i, (images, labels) in enumerate(dataloader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            outputs = model(images)
+            loss = criterion(outputs,labels)
+            _, predicted = torch.max(outputs.data, 1)
+            loss += loss
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+    loss = (loss/total).item()
+    accuracy = (correct/total)*100
+    return loss, accuracy
+def train_default(train_loader, val_loader, model, optimizer, criterion, args):
+    '''
+    The default simple training loop
+    '''
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    time_start = time.time()
+    for epoch in range(args['epochs']):
+        print("Epoch ", epoch)
+        for i, (images, labels) in enumerate(train_loader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            # Zero the parameter gradients
+            optimizer.zero_grad()
+            # Forward pass
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            # Backward pass
+            loss.backward()
+            # Optimize
+            optimizer.step()
+        # Evaluate at the end of the epoch on the train set
+        train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
+        print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
+        train_losses.append(train_loss)
+        train_accuracies.append(train_accuracy)
+        # Evaluate at the end of the epoch on the val set
+        val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
+        print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
+        val_losses.append(val_loss)
+        val_accuracies.append(val_accuracy)
+    duration = time.time() - time_start
+    print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
+    results = {'model':model,
+               'train_losses': train_losses,
+               'train_accuracies': train_accuracies,
+               'val_losses': val_losses,
+               'val_accuracies': val_accuracies,
+               'duration':duration}
+    return results
+def explore_lrs(dataloader,
+                model,
+                optimizer,
+                args,
+                min_learning_rate_power=-8,
+                max_learning_rate_power = 1,
+                num_lrs=10,
+                steps_per_lr=50):
+    lrs = np.logspace(min_learning_rate_power, max_learning_rate_power, num=num_lrs)
+    print("Learning rate space : ", lrs)
+    model_init_state = model.state_dict()
+    lrs_losses, lrs_metric_avg, lrs_metric_var =[], [],[]
+    # Iterate through learning rates to test
+    for lr in lrs:
+        print("Testing lr:", '{:.2e}'.format(lr))
+        # Reset model
+        model.load_state_dict(model_init_state)
+        # Change learning rate in optimizer
+        for group in optimizer.param_groups:
+            group['lr'] = lr
+        # Reset metric tracking
+        lr_losses =[]
+        # Training steps
+        for step in range(steps_per_lr):
+            images, labels = next(iter(dataloader))
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            optimizer.zero_grad()
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            lr_losses.append(loss.item())
+        print(lr_losses)
+        # Compute loss average for lr
+        lr_loss_avg = np.mean(lr_losses)
+        lr_loss_avg = lr_losses[-1]
+        lrs_losses.append(lr_loss_avg)
+        # Compute metric (discounted average gradient of the loss)
+        lr_gradients = np.gradient(lr_losses)
+        lr_metric_avg = np.mean(lr_gradients)
+        lr_metric_var = np.var(lr_gradients)
+        lrs_metric_avg.append(lr_metric_avg)
+        lrs_metric_var.append(lr_metric_var)
+        model.load_state_dict(model_init_state)
+    return lrs, lrs_losses, lrs_metric_avg, lrs_metric_var
+def plot_eval(lrs, lrs_losses, lrs_metric_avg, lrs_metric_var):
+    print("lrs: ", lrs)
+    print("lrs_losses: ", lrs_losses)
+    print("lrs_metric_avg: ", lrs_metric_avg)
+    print("lrs_metric_var: ", lrs_metric_var)
+    fig, axs = plt.subplots(3, figsize=(10,15))
+    axs[0].plot(lrs, lrs_losses, color='blue', label="losses_avg")
+    axs[0].set_xlabel('learning rate', fontsize=15)
+    axs[0].set_ylabel('Loss', fontsize=15)
+    axs[0].set_xscale('log')
+    axs[0].set_yscale('symlog')
+    axs[0].set_ylim([0,  min(lrs_losses)*100])
+    axs[1].plot(lrs, lrs_metric_avg, color='red', label="discounted_metric_avg")
+    axs[1].hlines(y=0, xmin=lrs[0], xmax=lrs[-1], linewidth=2, color='black')
+    axs[1].set_xlabel('learning rate', fontsize=15)
+    axs[1].set_ylabel('Metric average', fontsize=15)
+    axs[1].set_xscale('log')
+    axs[1].set_yscale('symlog')
+    axs[1].set_ylim([-abs(lrs_metric_avg[0])*100, abs(lrs_metric_avg[0])*100])
+    axs[2].plot(lrs, lrs_metric_var, color='green', label="discounted_metric_var")
+    axs[2].set_xlabel('learning rate', fontsize=15)
+    axs[2].set_ylabel('Metric variance', fontsize=15)
+    axs[2].set_xscale('log')
+    axs[2].set_yscale('symlog')
+    axs[2].set_ylim([0, min(lrs_metric_var)*1000])
+    plt.show()
+def compare_trainings(results_default, results_optim):
+    fig, axs = plt.subplots(2, figsize=(10,10))
+    fig.suptitle('Performance comparison', fontsize=18)
+    train_alpha = 0.5
+    # Validation losses
+    axs[0].plot(range(len(results_default['val_losses'])), results_default['val_losses'], color='blue', label="default val")
+    axs[0].plot(range(len(results_optim['val_losses'])), results_optim['val_losses'], color='red', label="optim val")
+    # Training losses
+    axs[0].plot(range(len(results_default['train_losses'])), results_default['train_losses'], color='blue', label="default train", linestyle='--', alpha = train_alpha)
+    axs[0].plot(range(len(results_optim['train_losses'])), results_optim['train_losses'], color='red', label="optim train", linestyle='--', alpha = train_alpha)
+    axs[0].set_xlabel('Epochs', fontsize=14)
+    axs[0].set_ylabel('Loss', fontsize=14)
+    axs[0].set_xscale('linear')
+    axs[0].set_yscale('linear')
+    max_loss = max(results_default['train_losses']+results_default['val_losses']+results_optim['train_losses']+results_optim['val_losses'])
+    axs[0].set_ylim([0,  max_loss])
+    axs[0].legend(loc="upper right")
+    # Validation accuracies
+    axs[1].plot(range(len(results_default['val_accuracies'])), results_default['val_accuracies'], color='blue', label="default val")
+    axs[1].plot(range(len(results_optim['val_accuracies'])), results_optim['val_accuracies'], color='red', label="optim val")
+    # Training default accuracies
+    axs[1].plot(range(len(results_default['train_accuracies'])), results_default['train_accuracies'], color='blue', label="default train", linestyle='--', alpha=train_alpha)
+    axs[1].plot(range(len(results_optim['train_accuracies'])), results_optim['train_accuracies'], color='red', label="optim train", linestyle='--', alpha=train_alpha)
+    axs[1].set_xlabel('Epochs', fontsize=15)
+    axs[1].set_ylabel('Accuracy', fontsize=15)
+    axs[1].set_xscale('linear')
+    axs[1].set_yscale('linear')
+    axs[1].set_ylim([0,  100])
+    axs[1].legend(loc="lower right")
+```
+%% Cell type:markdown id: tags:
+## Training configuration variables
+For the first run, you can let all the values given by default.
+For the optimized run, you could changing some parameters.
+>In particular, you will have to change :
+>- the batch_size
+>- the learning rate
+%% Cell type:code id: tags:
+``` 
+args = {
+    'batch_size':64,
+    'epochs': 10,
+    'image_size': 224,
+    'learning_rate': 0.001,
+    'momentum': 0.9,
+    'weight_decay': 0.0001,
+    'download': True,
+    'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
+    'dataset_root_dir': os.getcwd(),
+}
+#################################################
+############# Modify the code below #############
+#################################################
+args_optim = {
+    'batch_size':64,
+    'epochs': 10,
+    'image_size': 224,
+    'learning_rate': 0.001,
+    'momentum': 0.9,
+    'weight_decay': 0.0001,
+    'download': True,
+    'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
+    'dataset_root_dir': os.getcwd(),
+}
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler (click to reveal)</summary>
+```python
+args_optim = {
+    'batch_size':512,
+    'epochs': 10,
+    'image_size': 224,
+    'learning_rate': 0.001,
+    'momentum': 0.9,
+    'weight_decay': 0.01,
+    'download': True,
+    'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
+    'dataset_root_dir': os.getcwd(),
+}
+```
+</details>
+%% Cell type:markdown id: tags:
+## Data transformation and augmentation
+Below, we define the transformations to apply to each image when loaded.
+It can serve three main purposes:
+- having the data in the desired format for the model (systematic transformation)
+- correcting/normalizing the data (systematic transformation)
+- artificially increasing the amount of data by transforming the data  (random transformation)
+Warning : the evaluation dataset should always be the same so you should not apply random transformations to it.
+> Enrich the transformations by using the provided by torchvision : https://pytorch.org/vision/0.12/transforms.html
+> **Change transform_optim and val_transform_optim only**
+%% Cell type:code id: tags:
+``` 
+transform = transforms.Compose([transforms.ToTensor()])     # convert the PIL Image to a tensor
+val_transform = transforms.Compose([transforms.ToTensor()]) # convert the PIL Image to a tensor
+#################################################
+############# Modify the code below #############
+#################################################
+transform_optim = transforms.Compose([transforms.ToTensor()])     # convert the PIL Image to a tensor
+val_transform_optim = transforms.Compose([transforms.ToTensor()]) # convert the PIL Image to a tensor
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+transform_optim = transforms.Compose([
+    transforms.RandomHorizontalFlip(),              # Horizontal Flip - Data Augmentation
+    transforms.ToTensor()                          # convert the PIL Image to a tensor
+    ])
+val_transform_optim = transforms.Compose([
+                transforms.ToTensor()                           # convert the PIL Image to a tensor
+                ])
+```
+</details>
+%% Cell type:markdown id: tags:
+## Dataset
+In the cell below, we define the dataset.
+Here we have two subset:
+- a training subset for model optimization
+- a test subset for model evaluation
+%% Cell type:code id: tags:
+``` 
+train_dataset = torchvision.datasets.CIFAR10(root=args['dataset_root_dir']+'/CIFAR_10', train=True, download=args['download'], transform=transform)
+val_dataset = torchvision.datasets.CIFAR10(root=args['dataset_root_dir']+'/CIFAR_10', train=False, download=args['download'], transform=val_transform)
+train_dataset_optim = torchvision.datasets.CIFAR10(root=args_optim['dataset_root_dir']+'/CIFAR_10', train=True, download=args_optim['download'], transform=transform_optim)
+val_dataset_optim = torchvision.datasets.CIFAR10(root=args_optim['dataset_root_dir']+'/CIFAR_10', train=False, download=args_optim['download'], transform=val_transform_optim)
+```
+%% Cell type:markdown id: tags:
+## Dataloader
+The DataLoader class in PyTorch is responsible for loading and batching data from a dataset object, such as a PyTorch tensor or a NumPy array.
+It works by creating a Python iterable over the dataset and yielding a batch of data at each iteration.
+Those batches will be fed to the model for training or inference.
+The DataLoader class also provides various options for shuffling, batching, and parallelizing the data loading process, making it a useful tool for efficient and flexible data handling in PyTorch.
+> Take a look at the DataLoader documentation : https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader
+> Optimize the dataloader by taking advantage of parallelism and smart use of computational ressources :
+>- batch_size
+>- pin_memory
+>- prefetch_factor
+>- persistent_workers
+>- num_workers
+%% Cell type:code id: tags:
+``` 
+train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
+                                           batch_size=args['batch_size'],
+                                           shuffle=True,
+                                           drop_last=True)
+val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
+                                         batch_size=args['batch_size'],
+                                         shuffle=False,
+                                         drop_last=True)
+#################################################
+############# Modify the code below #############
+#################################################
+train_loader_optim = torch.utils.data.DataLoader(dataset=train_dataset,
+                                           batch_size=args_optim['batch_size'],
+                                           shuffle=True,
+                                           drop_last=True)
+val_loader_optim = torch.utils.data.DataLoader(dataset=val_dataset,
+                                         batch_size=args_optim['batch_size'],
+                                         shuffle=False,
+                                         drop_last=True)
+```
+%% Cell type:code id: tags:
+``` 
+%timeit -r 1 -n 1 iter_dataloader(train_loader, 1, args)
+%timeit -r 1 -n 1 iter_dataloader(train_loader_optim, 1, args_optim)
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+WIP : Quelques explications
+```python
+train_loader_optim = torch.utils.data.DataLoader(dataset=train_dataset_optim,
+                                                 batch_size=args_optim['batch_size'],
+                                                 shuffle=True,
+                                                 drop_last=True,
+                                                 num_workers=10,
+                                                 persistent_workers=True,
+                                                 pin_memory=True,
+                                                 prefetch_factor=10)
+val_loader_optim = torch.utils.data.DataLoader(dataset=val_dataset_optim,
+                                               batch_size=args_optim['batch_size'],
+                                               shuffle=False,
+                                               drop_last=True,
+                                               num_workers=10,
+                                               persistent_workers=True,
+                                               pin_memory=True,
+                                               prefetch_factor=10)
+```
+</details>
+%% Cell type:markdown id: tags:
+## Model
+> Do not forget to verify that you use the right compute ressources for your model
+> By default, the model resnet18 is initialized with random weights but you could try using a pretrained model : https://pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html#torchvision.models.ResNet18_Weights
+%% Cell type:code id: tags:
+``` 
+model = models.resnet18()
+model = model.to(args['device'])
+model.name = 'Resnet-18'
+print("Stock model on device:", next(model.parameters()).device)
+#################################################
+############# Modify the code below #############
+#################################################
+model_optim = models.resnet18()
+model_optim = model_optim.to(args_optim['device'])
+model_optim.name = 'Resnet-18'
+print("Optimized model on device:", next(model_optim.parameters()).device)
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+model_optim = models.resnet18(ResNet18_Weights)
+model_optim = model_optim.to(args_optim['device'])
+model_optim.name = 'Resnet-18'
+print("Optimized model on device:", next(model_optim.parameters()).device)
+```
+</details>
+%% Cell type:markdown id: tags:
+## Loss
+We use a standart loss for classification.
+For the comparison, if you change the loss, change it for both.
+%% Cell type:code id: tags:
+``` 
+criterion = torch.nn.CrossEntropyLoss()
+criterion_optim = torch.nn.CrossEntropyLoss()
+```
+%% Cell type:markdown id: tags:
+## Optimizer
+> In order to speed up the training, you can try to use a different optimizer: https://pytorch.org/docs/stable/optim.html#base-class
+%% Cell type:code id: tags:
+``` 
+optimizer = torch.optim.SGD(model.parameters(), args['learning_rate'], args['momentum'], args['weight_decay'])
+#################################################
+############# Modify the code below #############
+#################################################
+optimizer_optim = torch.optim.SGD(model.parameters(), args_optim['learning_rate'], args_optim['momentum'], args_optim['weight_decay'])
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+optimizer_optim = torch.optim.AdamW(model_optim.parameters(), lr = args_optim['learning_rate'], weight_decay=args_optim['weight_decay'])
+```
+</details>
+%% Cell type:markdown id: tags:
+## Learning rate scheduler
+In order to adjust the learning rate over iterations/epochs, we can make use of a learning rate scheduler.
+To use a LR scheduler, you will need to :
+- instantiate the scheduler (in the coding cell below)
+- adapt the training loop (in the "Training" section)
+Take a look at this page : https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate which:
+- describes how to use a scheduler (warning : some scheduler are updated at a step level and others at an epoch level)
+- lists the available schedulers (you could also create your own starting from the _LRScheduler class)
+> **You can define your scheduler here.**
+> **You will have to modify the training loop later on.**
+%% Cell type:code id: tags:
+``` 
+scheduler = None
+#################################################
+############# Modify the code below #############
+#################################################
+scheduler_optim = None
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+scheduler_optim = torch.optim.lr_scheduler.OneCycleLR(optimizer_optim,
+                                                      max_lr=args_optim['learning_rate'],
+                                                      steps_per_epoch = len(train_loader_optim),
+                                                      epochs=args_optim['epochs'])
+```
+</details>
+%% Cell type:markdown id: tags:
+## Model training (reference performances)
+Once we have all our main actors, we can setup the stage that is our training loop.
+Below is used a typical loop as you can find in https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
+> **Run it a first time to have a performance baseline with all the default values.**
+%% Cell type:code id: tags:
+``` 
+results_default = train_default(train_loader, val_loader, model, optimizer, criterion, args)
+```
+%% Cell type:markdown id: tags:
+## Speeding up the hyperparameter search : Learning Rate Finder
+Wether we are using a scheduler or not, we need to determine either :
+- the constant learning rate you want to use,
+- or the maximum learning rate used by the scheduler.
+If you are in the first situation, you just want a good all-rounder learning rate to have a relatively fast conversion and minimize the oscillations at the end of the convergence.
+In the second situation, you can focus more on having the fastest inital convergence as the oscillations will be generally taken care by a decreasing learning rate strategy. Thus, we want the highest maximum learning rate possible.
+It would be ideal to find the best learning rate quickly in order to speedup our hyperparameter search.
+Various strategy more or less complex exists to find an estimate of this value.
+Below, we try to find the learning rate by doing a few steps on a range of learning rates. We evaluate each learning rate to determine the best one to choose for our full training.
+> **As this step can take quite some time, we provided you with some values for the default config which you are not supposed to change anyway. You can find them in the next spoiler**
+> **Uncomment explore_lrs to rerun the exploration, otherwise you can reuse the given values.**
+> **Be careful to re-run this cell to reset the model and optimizer,...  to have a "fresh" exploration each time**
+> **Also if you change the optimizer for the optimized run, change it also here to find the best learning rate for that optimizer.** Or rerun the cell where you defined it.
+%% Cell type:code id: tags:
+``` 
+lrs, lrs_losses, lrs_metric_avg, lrs_metric_var = explore_lrs(train_loader_optim,
+                                                              model_optim,
+                                                              optimizer_optim,
+                                                              args_optim,
+                                                              min_learning_rate_power=-6,
+                                                              max_learning_rate_power = 1,
+                                                              num_lrs=8,
+                                                              steps_per_lr=100)
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+lrs=[1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01]
+lrs_losses=  [7.502097129821777, 7.22658634185791, 5.24326229095459, 1.7600191831588745, 1.4037541151046753, 2.136382579803467, 2.1029751300811768, 446.49951171875]
+lrs_metric_avg=[0.0017601490020751954, -0.005245075225830078, -0.041641921997070314, -0.07478624820709229, -0.007052739858627319, 0.04763659238815308, 0.03924872875213623, 9.939403522014619]
+lrs_metric_var=[0.0006510000222988311, 0.0004144988674492198, 0.000668689274974986, 0.013876865854565344, 0.001481160611942387, 0.3384368026131311, 0.8817071610439394, 2157852536609.2454]
+```
+</details>
+%% Cell type:code id: tags:
+``` 
+plot_eval(lrs, lrs_losses, lrs_metric_avg, lrs_metric_var)
+```
+%% Cell type:markdown id: tags:
+## Optimize the training loop
+> Adapt the dataset transformations, batch_size & dataloader, lr & lr_scheduler, and optimizer in order to achieve better classification results in less time.
+> Change this training loop to include:
+> - a learning rate scheduler : https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
+> - a strategy such as early stopping or patience : https://www.kaggle.com/code/akhileshrai/tutorial-early-stopping-vanilla-rnn-pytorch?scriptVersionId=26440051&cellId=10#4.-Early-Stopping
+> **Also think about changing the call to the function if you added arguments.**
+> For you, we added automatic mixed precision which will be seen in the next course
+> **BEFORE RUNNING, WE NEED TO REINITIALIZE THE MODEL, OPTIMIZER AND SCHEDULER FOR A FAIR FIGHT. Rewrite below the changes you have brought to them.**
+%% Cell type:code id: tags:
+``` 
+model_optim = models.resnet18().to(args_optim['device'])
+model_optim.name = 'Resnet-18'
+optimizer_optim = torch.optim.SGD(model_optim.parameters(), args_optim['learning_rate'], args_optim['momentum'], args_optim['weight_decay'])
+scheduler_optim = None
+```
+%% Cell type:code id: tags:
+``` 
+def train_optim(train_loader, val_loader, model, optimizer, criterion, args):
+    '''
+    The default simple training loop
+    '''
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    time_start = time.time()
+    for epoch in range(args['epochs']):
+        print("Epoch ", epoch)
+        for i, (images, labels) in enumerate(train_loader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            # Zero the parameter gradients
+            optimizer.zero_grad()
+            # Forward pass
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            # Backward pass
+            loss.backward()
+            # Optimize
+            optimizer.step()
+        # Evaluate at the end of the epoch on the train set
+        train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
+        print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
+        train_losses.append(train_loss)
+        train_accuracies.append(train_accuracy)
+        # Evaluate at the end of the epoch on the val set
+        val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
+        print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
+        val_losses.append(val_loss)
+        val_accuracies.append(val_accuracy)
+    duration = time.time() - time_start
+    print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
+    results = {'model':model,
+               'train_losses': train_losses,
+               'train_accuracies': train_accuracies,
+               'val_losses': val_losses,
+               'val_accuracies': val_accuracies,
+               'duration':duration}
+    return results
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+def train_optim(train_loader, val_loader, model, optimizer, criterion, scheduler, args):
+    '''
+    The default simple training loop
+    '''
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    time_start = time.time()
+    scaler = GradScaler()
+    for epoch in range(args['epochs']):
+        print("Epoch ", epoch)
+        for i, (images, labels) in enumerate(train_loader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            # Zero the parameter gradients
+            optimizer.zero_grad()
+            # Forward pass
+            with autocast():
+                outputs = model(images)
+                loss = criterion(outputs, labels)
+            # Backward pass
+            scaler.scale(loss).backward()
+            # Optimize
+            scaler.step(optimizer)
+            # Updates the scale for next iteration.
+            scaler.update()
+            # Update Learning Rate scheduler, warning some schedulers are updated every epoch and not step.
+            if scheduler is not None:
+                scheduler.step()
+        # Evaluate at the end of the epoch
+        train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
+        print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
+        train_losses.append(train_loss)
+        train_accuracies.append(train_accuracy)
+        # Evaluate at the end of the epoch
+        val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
+        print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
+        val_losses.append(val_loss)
+        val_accuracies.append(val_accuracy)
+    duration = time.time() - time_start
+    print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
+    results = {'model':model,
+               'train_losses': train_losses,
+               'train_accuracies': train_accuracies,
+               'val_losses': val_losses,
+               'val_accuracies': val_accuracies,
+               'duration':duration}
+    return results
+```
+</details>
+%% Cell type:code id: tags:
+``` 
+results_optim = train_optim(train_loader_optim, val_loader_optim, model_optim, optimizer_optim, criterion_optim, args_optim)
+```
+%% Cell type:markdown id: tags:
+## Classification performances comparison
+> Take a look at
+>- the loss and accuracy evolution
+>- the difference in timings between the two runs
+%% Cell type:code id: tags:
+``` 
+print("Duration for default setup training:", results_default["duration"])
+print("Duration for optim setup training:", results_optim["duration"])
+```
+%% Cell type:code id: tags:
+``` 
+compare_trainings(results_default, results_optim)
+```
+%% Cell type:markdown id: tags:
+## Tensorboard
+Below we added a profiler and a logger for tensorboard. If you want to do it yourself in future codes, you can take example on the following documentations::
+- Pytorch : https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html
+- IDRIS : http://www.idris.fr/jean-zay/pre-post/jean-zay-tensorboard.html
+> Try to add another metric to the logger, for example the validation loss at each epoch.
+%% Cell type:code id: tags:
+``` 
+def train_default_tensorboard(train_loader, val_loader, model, optimizer, criterion, args, exp_name):
+    log_dir = "./logs/"+exp_name
+    writer = SummaryWriter(log_dir)
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    time_start = time.time()
+    with torch.profiler.profile(
+        schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
+        on_trace_ready=torch.profiler.tensorboard_trace_handler(log_dir),
+        record_shapes=True,
+        profile_memory=True,
+        with_stack=True
+    ) as prof:
+        for epoch in range(args['epochs']):
+            print("Epoch ", epoch)
+            for i, (images, labels) in enumerate(train_loader):
+                # distribution of images and labels to all GPUs
+                images = images.to(args['device'], non_blocking=True)
+                labels = labels.to(args['device'], non_blocking=True)
+                # Zero the parameter gradients
+                optimizer.zero_grad()
+                # Forward pass
+                outputs = model(images)
+                loss = criterion(outputs, labels)
+                # Log a scalar (loss)
+                writer.add_scalar("Loss/train", loss, i+epoch*len(train_loader))
+                # Backward pass
+                loss.backward()
+                # Optimize
+                optimizer.step()
+                # Indicate to profiler when a step is over
+                prof.step()
+            # Evaluate at the end of the epoch on the train set
+            train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
+            print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
+            train_losses.append(train_loss)
+            train_accuracies.append(train_accuracy)
+            # Evaluate at the end of the epoch on the val set
+            val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
+            print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
+            val_losses.append(val_loss)
+            val_accuracies.append(val_accuracy)
+    duration = time.time() - time_start
+    print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
+    results = {'model':model,
+               'train_losses': train_losses,
+               'train_accuracies': train_accuracies,
+               'val_losses': val_losses,
+               'val_accuracies': val_accuracies,
+               'duration':duration}
+    return results
+```
+%% Cell type:code id: tags:
+``` 
+args["epochs"] = 1
+_ = train_default_tensorboard(train_loader, val_loader, model, optimizer, criterion, args, "default_perf")
+```
+%% Cell type:code id: tags:
+``` 
+# Load the TensorBoard notebook extension
+!pip install torch_tb_profiler
+%load_ext tensorboard
+%tensorboard --logdir logs
+```
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [OPT1] - Training setup optimization
+<!-- DESC --> The goal of this notebook is to go through a typical deep learning model training
+<!-- AUTHOR : Kamel Guerda (CNRS/IDRIS), Léo Hunout (CNRS/IDRIS) -->
+## Objectives :
+**Practice lab : Optimize your training process**
+%% Cell type:markdown id: tags:
+## Introduction
+This Lab takes place as a pratical exercice of the [fidle](https://fidle.cnrs.fr/) online course N°16.
+The goal of this notebook is to go through a typical deep learning model training. We will see what can be changed to optimize this training setup but also good practices to make more efficient experiments.
+This notebook makes use of:
+- The CIFAR10 dataset
+- A Resnet model
+- Pytorch
+- A GPU (the notebook can be ran on Jean-Zay if you have an account, on Google collab with a 16go gpu or at home with a dedicated gpu by scaling down the batch_size)
+In particular we will work on:
+- the dataloader strategy used to load data
+- the model initial weights, in particular using a pretrained model
+- the learning rate and learning rate scheduler
+- the optimizer
+- visualizing and comparing results using python, tensorboard
+- various good practices/reminders
+> First, you can do a complete execution of the notebook.
+> **Then comeback from the start and follow the instructions to edit various components for better performance. You can also change them during the first execution if you have some intuitions about what should be changed and how.**
+> **In order to compare performance, only change the xxx_optim variables which are the one you will use in your optimized training**
+%% Cell type:code id: tags:
+``` 
+!nvidia-smi
+```
+%% Cell type:markdown id: tags:
+## Few imports
+%% Cell type:code id: tags:
+``` 
+import os
+import time
+import random
+import numpy as np
+import torch
+from torch.cuda.amp import autocast, GradScaler
+from torch.optim.lr_scheduler import _LRScheduler
+import torchvision
+import torchvision.transforms as transforms
+import torchvision.models as models
+from torchvision.models.resnet import ResNet18_Weights
+import matplotlib.pyplot as plt
+from datetime import datetime
+from torch.utils.tensorboard import SummaryWriter
+```
+%% Cell type:markdown id: tags:
+## Fix random seeds
+In order to have experiment reproductibility, it is a good practice to fix the random number generators seeds.
+Warning : there might be more seeds to set than you expect! Maths,visualization,transformations libraries, ...
+%% Cell type:code id: tags:
+``` 
+random.seed(123)
+np.random.seed(123)
+torch.manual_seed(123)
+```
+%% Cell type:markdown id: tags:
+## Some functions
+Below we define a few functions that will be used further in the notebook.
+**Do not change them unless you know what and why you are doing it.**
+%% Cell type:code id: tags:
+``` 
+def iter_dataloader(dataloader, epochs, args):
+    for epoch in range(epochs):
+        for i, (images, labels) in enumerate(dataloader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+def evaluate(dataloader, model, criterion, args):
+    '''
+    A simple loop for evaluation
+    '''
+    loss = 0
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for i, (images, labels) in enumerate(dataloader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            outputs = model(images)
+            loss = criterion(outputs,labels)
+            _, predicted = torch.max(outputs.data, 1)
+            loss += loss
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+    loss = (loss/total).item()
+    accuracy = (correct/total)*100
+    return loss, accuracy
+def train_default(train_loader, val_loader, model, optimizer, criterion, args):
+    '''
+    The default simple training loop
+    '''
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    time_start = time.time()
+    for epoch in range(args['epochs']):
+        print("Epoch ", epoch)
+        for i, (images, labels) in enumerate(train_loader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            # Zero the parameter gradients
+            optimizer.zero_grad()
+            # Forward pass
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            # Backward pass
+            loss.backward()
+            # Optimize
+            optimizer.step()
+        # Evaluate at the end of the epoch on the train set
+        train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
+        print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
+        train_losses.append(train_loss)
+        train_accuracies.append(train_accuracy)
+        # Evaluate at the end of the epoch on the val set
+        val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
+        print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
+        val_losses.append(val_loss)
+        val_accuracies.append(val_accuracy)
+    duration = time.time() - time_start
+    print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
+    results = {'model':model,
+               'train_losses': train_losses,
+               'train_accuracies': train_accuracies,
+               'val_losses': val_losses,
+               'val_accuracies': val_accuracies,
+               'duration':duration}
+    return results
+def explore_lrs(dataloader,
+                model,
+                optimizer,
+                args,
+                min_learning_rate_power=-8,
+                max_learning_rate_power = 1,
+                num_lrs=10,
+                steps_per_lr=50):
+    lrs = np.logspace(min_learning_rate_power, max_learning_rate_power, num=num_lrs)
+    print("Learning rate space : ", lrs)
+    model_init_state = model.state_dict()
+    lrs_losses, lrs_metric_avg, lrs_metric_var =[], [],[]
+    # Iterate through learning rates to test
+    for lr in lrs:
+        print("Testing lr:", '{:.2e}'.format(lr))
+        # Reset model
+        model.load_state_dict(model_init_state)
+        # Change learning rate in optimizer
+        for group in optimizer.param_groups:
+            group['lr'] = lr
+        # Reset metric tracking
+        lr_losses =[]
+        # Training steps
+        for step in range(steps_per_lr):
+            images, labels = next(iter(dataloader))
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            optimizer.zero_grad()
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            lr_losses.append(loss.item())
+        print(lr_losses)
+        # Compute loss average for lr
+        lr_loss_avg = np.mean(lr_losses)
+        lr_loss_avg = lr_losses[-1]
+        lrs_losses.append(lr_loss_avg)
+        # Compute metric (discounted average gradient of the loss)
+        lr_gradients = np.gradient(lr_losses)
+        lr_metric_avg = np.mean(lr_gradients)
+        lr_metric_var = np.var(lr_gradients)
+        lrs_metric_avg.append(lr_metric_avg)
+        lrs_metric_var.append(lr_metric_var)
+        model.load_state_dict(model_init_state)
+    return lrs, lrs_losses, lrs_metric_avg, lrs_metric_var
+def plot_eval(lrs, lrs_losses, lrs_metric_avg, lrs_metric_var):
+    print("lrs: ", lrs)
+    print("lrs_losses: ", lrs_losses)
+    print("lrs_metric_avg: ", lrs_metric_avg)
+    print("lrs_metric_var: ", lrs_metric_var)
+    fig, axs = plt.subplots(3, figsize=(10,15))
+    axs[0].plot(lrs, lrs_losses, color='blue', label="losses_avg")
+    axs[0].set_xlabel('learning rate', fontsize=15)
+    axs[0].set_ylabel('Loss', fontsize=15)
+    axs[0].set_xscale('log')
+    axs[0].set_yscale('symlog')
+    axs[0].set_ylim([0,  min(lrs_losses)*100])
+    axs[1].plot(lrs, lrs_metric_avg, color='red', label="discounted_metric_avg")
+    axs[1].hlines(y=0, xmin=lrs[0], xmax=lrs[-1], linewidth=2, color='black')
+    axs[1].set_xlabel('learning rate', fontsize=15)
+    axs[1].set_ylabel('Metric average', fontsize=15)
+    axs[1].set_xscale('log')
+    axs[1].set_yscale('symlog')
+    axs[1].set_ylim([-abs(lrs_metric_avg[0])*100, abs(lrs_metric_avg[0])*100])
+    axs[2].plot(lrs, lrs_metric_var, color='green', label="discounted_metric_var")
+    axs[2].set_xlabel('learning rate', fontsize=15)
+    axs[2].set_ylabel('Metric variance', fontsize=15)
+    axs[2].set_xscale('log')
+    axs[2].set_yscale('symlog')
+    axs[2].set_ylim([0, min(lrs_metric_var)*1000])
+    plt.show()
+def compare_trainings(results_default, results_optim):
+    fig, axs = plt.subplots(2, figsize=(10,10))
+    fig.suptitle('Performance comparison', fontsize=18)
+    train_alpha = 0.5
+    # Validation losses
+    axs[0].plot(range(len(results_default['val_losses'])), results_default['val_losses'], color='blue', label="default val")
+    axs[0].plot(range(len(results_optim['val_losses'])), results_optim['val_losses'], color='red', label="optim val")
+    # Training losses
+    axs[0].plot(range(len(results_default['train_losses'])), results_default['train_losses'], color='blue', label="default train", linestyle='--', alpha = train_alpha)
+    axs[0].plot(range(len(results_optim['train_losses'])), results_optim['train_losses'], color='red', label="optim train", linestyle='--', alpha = train_alpha)
+    axs[0].set_xlabel('Epochs', fontsize=14)
+    axs[0].set_ylabel('Loss', fontsize=14)
+    axs[0].set_xscale('linear')
+    axs[0].set_yscale('linear')
+    max_loss = max(results_default['train_losses']+results_default['val_losses']+results_optim['train_losses']+results_optim['val_losses'])
+    axs[0].set_ylim([0,  max_loss])
+    axs[0].legend(loc="upper right")
+    # Validation accuracies
+    axs[1].plot(range(len(results_default['val_accuracies'])), results_default['val_accuracies'], color='blue', label="default val")
+    axs[1].plot(range(len(results_optim['val_accuracies'])), results_optim['val_accuracies'], color='red', label="optim val")
+    # Training default accuracies
+    axs[1].plot(range(len(results_default['train_accuracies'])), results_default['train_accuracies'], color='blue', label="default train", linestyle='--', alpha=train_alpha)
+    axs[1].plot(range(len(results_optim['train_accuracies'])), results_optim['train_accuracies'], color='red', label="optim train", linestyle='--', alpha=train_alpha)
+    axs[1].set_xlabel('Epochs', fontsize=15)
+    axs[1].set_ylabel('Accuracy', fontsize=15)
+    axs[1].set_xscale('linear')
+    axs[1].set_yscale('linear')
+    axs[1].set_ylim([0,  100])
+    axs[1].legend(loc="lower right")
+```
+%% Cell type:markdown id: tags:
+## Training configuration variables
+For the first run, you can let all the values given by default.
+For the optimized run, you could changing some parameters.
+>In particular, you will have to change :
+>- the batch_size
+>- the learning rate
+%% Cell type:code id: tags:
+``` 
+args = {
+    'batch_size':64,
+    'epochs': 10,
+    'image_size': 224,
+    'learning_rate': 0.001,
+    'momentum': 0.9,
+    'weight_decay': 0.0001,
+    'download': True,
+    'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
+    'dataset_root_dir': os.getcwd(),
+}
+#################################################
+############# Modify the code below #############
+#################################################
+args_optim = {
+    'batch_size':64,
+    'epochs': 10,
+    'image_size': 224,
+    'learning_rate': 0.001,
+    'momentum': 0.9,
+    'weight_decay': 0.0001,
+    'download': True,
+    'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
+    'dataset_root_dir': os.getcwd(),
+}
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler (click to reveal)</summary>
+```python
+args_optim = {
+    'batch_size':512,
+    'epochs': 10,
+    'image_size': 224,
+    'learning_rate': 0.001,
+    'momentum': 0.9,
+    'weight_decay': 0.01,
+    'download': True,
+    'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
+    'dataset_root_dir': os.getcwd(),
+}
+```
+</details>
+%% Cell type:markdown id: tags:
+## Data transformation and augmentation
+Below, we define the transformations to apply to each image when loaded.
+It can serve three main purposes:
+- having the data in the desired format for the model (systematic transformation)
+- correcting/normalizing the data (systematic transformation)
+- artificially increasing the amount of data by transforming the data  (random transformation)
+Warning : the evaluation dataset should always be the same so you should not apply random transformations to it.
+> Enrich the transformations by using the provided by torchvision : https://pytorch.org/vision/0.12/transforms.html
+> **Change transform_optim and val_transform_optim only**
+%% Cell type:code id: tags:
+``` 
+transform = transforms.Compose([transforms.ToTensor()])     # convert the PIL Image to a tensor
+val_transform = transforms.Compose([transforms.ToTensor()]) # convert the PIL Image to a tensor
+#################################################
+############# Modify the code below #############
+#################################################
+transform_optim = transforms.Compose([transforms.ToTensor()])     # convert the PIL Image to a tensor
+val_transform_optim = transforms.Compose([transforms.ToTensor()]) # convert the PIL Image to a tensor
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+transform_optim = transforms.Compose([
+    transforms.RandomHorizontalFlip(),              # Horizontal Flip - Data Augmentation
+    transforms.ToTensor()                          # convert the PIL Image to a tensor
+    ])
+val_transform_optim = transforms.Compose([
+                transforms.ToTensor()                           # convert the PIL Image to a tensor
+                ])
+```
+</details>
+%% Cell type:markdown id: tags:
+## Dataset
+In the cell below, we define the dataset.
+Here we have two subset:
+- a training subset for model optimization
+- a test subset for model evaluation
+%% Cell type:code id: tags:
+``` 
+train_dataset = torchvision.datasets.CIFAR10(root=args['dataset_root_dir']+'/CIFAR_10', train=True, download=args['download'], transform=transform)
+val_dataset = torchvision.datasets.CIFAR10(root=args['dataset_root_dir']+'/CIFAR_10', train=False, download=args['download'], transform=val_transform)
+train_dataset_optim = torchvision.datasets.CIFAR10(root=args_optim['dataset_root_dir']+'/CIFAR_10', train=True, download=args_optim['download'], transform=transform_optim)
+val_dataset_optim = torchvision.datasets.CIFAR10(root=args_optim['dataset_root_dir']+'/CIFAR_10', train=False, download=args_optim['download'], transform=val_transform_optim)
+```
+%% Cell type:markdown id: tags:
+## Dataloader
+The DataLoader class in PyTorch is responsible for loading and batching data from a dataset object, such as a PyTorch tensor or a NumPy array.
+It works by creating a Python iterable over the dataset and yielding a batch of data at each iteration.
+Those batches will be fed to the model for training or inference.
+The DataLoader class also provides various options for shuffling, batching, and parallelizing the data loading process, making it a useful tool for efficient and flexible data handling in PyTorch.
+> Take a look at the DataLoader documentation : https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader
+> Optimize the dataloader by taking advantage of parallelism and smart use of computational ressources :
+>- batch_size
+>- pin_memory
+>- prefetch_factor
+>- persistent_workers
+>- num_workers
+%% Cell type:code id: tags:
+``` 
+train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
+                                           batch_size=args['batch_size'],
+                                           shuffle=True,
+                                           drop_last=True)
+val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
+                                         batch_size=args['batch_size'],
+                                         shuffle=False,
+                                         drop_last=True)
+#################################################
+############# Modify the code below #############
+#################################################
+train_loader_optim = torch.utils.data.DataLoader(dataset=train_dataset,
+                                           batch_size=args_optim['batch_size'],
+                                           shuffle=True,
+                                           drop_last=True)
+val_loader_optim = torch.utils.data.DataLoader(dataset=val_dataset,
+                                         batch_size=args_optim['batch_size'],
+                                         shuffle=False,
+                                         drop_last=True)
+```
+%% Cell type:code id: tags:
+``` 
+%timeit -r 1 -n 1 iter_dataloader(train_loader, 1, args)
+%timeit -r 1 -n 1 iter_dataloader(train_loader_optim, 1, args_optim)
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+WIP : Quelques explications
+```python
+train_loader_optim = torch.utils.data.DataLoader(dataset=train_dataset_optim,
+                                                 batch_size=args_optim['batch_size'],
+                                                 shuffle=True,
+                                                 drop_last=True,
+                                                 num_workers=10,
+                                                 persistent_workers=True,
+                                                 pin_memory=True,
+                                                 prefetch_factor=10)
+val_loader_optim = torch.utils.data.DataLoader(dataset=val_dataset_optim,
+                                               batch_size=args_optim['batch_size'],
+                                               shuffle=False,
+                                               drop_last=True,
+                                               num_workers=10,
+                                               persistent_workers=True,
+                                               pin_memory=True,
+                                               prefetch_factor=10)
+```
+</details>
+%% Cell type:markdown id: tags:
+## Model
+> Do not forget to verify that you use the right compute ressources for your model
+> By default, the model resnet18 is initialized with random weights but you could try using a pretrained model : https://pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html#torchvision.models.ResNet18_Weights
+%% Cell type:code id: tags:
+``` 
+model = models.resnet18()
+model = model.to(args['device'])
+model.name = 'Resnet-18'
+print("Stock model on device:", next(model.parameters()).device)
+#################################################
+############# Modify the code below #############
+#################################################
+model_optim = models.resnet18()
+model_optim = model_optim.to(args_optim['device'])
+model_optim.name = 'Resnet-18'
+print("Optimized model on device:", next(model_optim.parameters()).device)
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+model_optim = models.resnet18(ResNet18_Weights)
+model_optim = model_optim.to(args_optim['device'])
+model_optim.name = 'Resnet-18'
+print("Optimized model on device:", next(model_optim.parameters()).device)
+```
+</details>
+%% Cell type:markdown id: tags:
+## Loss
+We use a standart loss for classification.
+For the comparison, if you change the loss, change it for both.
+%% Cell type:code id: tags:
+``` 
+criterion = torch.nn.CrossEntropyLoss()
+criterion_optim = torch.nn.CrossEntropyLoss()
+```
+%% Cell type:markdown id: tags:
+## Optimizer
+> In order to speed up the training, you can try to use a different optimizer: https://pytorch.org/docs/stable/optim.html#base-class
+%% Cell type:code id: tags:
+``` 
+optimizer = torch.optim.SGD(model.parameters(), args['learning_rate'], args['momentum'], args['weight_decay'])
+#################################################
+############# Modify the code below #############
+#################################################
+optimizer_optim = torch.optim.SGD(model.parameters(), args_optim['learning_rate'], args_optim['momentum'], args_optim['weight_decay'])
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+optimizer_optim = torch.optim.AdamW(model_optim.parameters(), lr = args_optim['learning_rate'], weight_decay=args_optim['weight_decay'])
+```
+</details>
+%% Cell type:markdown id: tags:
+## Learning rate scheduler
+In order to adjust the learning rate over iterations/epochs, we can make use of a learning rate scheduler.
+To use a LR scheduler, you will need to :
+- instantiate the scheduler (in the coding cell below)
+- adapt the training loop (in the "Training" section)
+Take a look at this page : https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate which:
+- describes how to use a scheduler (warning : some scheduler are updated at a step level and others at an epoch level)
+- lists the available schedulers (you could also create your own starting from the _LRScheduler class)
+> **You can define your scheduler here.**
+> **You will have to modify the training loop later on.**
+%% Cell type:code id: tags:
+``` 
+scheduler = None
+#################################################
+############# Modify the code below #############
+#################################################
+scheduler_optim = None
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+scheduler_optim = torch.optim.lr_scheduler.OneCycleLR(optimizer_optim,
+                                                      max_lr=args_optim['learning_rate'],
+                                                      steps_per_epoch = len(train_loader_optim),
+                                                      epochs=args_optim['epochs'])
+```
+</details>
+%% Cell type:markdown id: tags:
+## Model training (reference performances)
+Once we have all our main actors, we can setup the stage that is our training loop.
+Below is used a typical loop as you can find in https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
+> **Run it a first time to have a performance baseline with all the default values.**
+%% Cell type:code id: tags:
+``` 
+results_default = train_default(train_loader, val_loader, model, optimizer, criterion, args)
+```
+%% Cell type:markdown id: tags:
+## Speeding up the hyperparameter search : Learning Rate Finder
+Wether we are using a scheduler or not, we need to determine either :
+- the constant learning rate you want to use,
+- or the maximum learning rate used by the scheduler.
+If you are in the first situation, you just want a good all-rounder learning rate to have a relatively fast conversion and minimize the oscillations at the end of the convergence.
+In the second situation, you can focus more on having the fastest inital convergence as the oscillations will be generally taken care by a decreasing learning rate strategy. Thus, we want the highest maximum learning rate possible.
+It would be ideal to find the best learning rate quickly in order to speedup our hyperparameter search.
+Various strategy more or less complex exists to find an estimate of this value.
+Below, we try to find the learning rate by doing a few steps on a range of learning rates. We evaluate each learning rate to determine the best one to choose for our full training.
+> **As this step can take quite some time, we provided you with some values for the default config which you are not supposed to change anyway. You can find them in the next spoiler**
+> **Uncomment explore_lrs to rerun the exploration, otherwise you can reuse the given values.**
+> **Be careful to re-run this cell to reset the model and optimizer,...  to have a "fresh" exploration each time**
+> **Also if you change the optimizer for the optimized run, change it also here to find the best learning rate for that optimizer.** Or rerun the cell where you defined it.
+%% Cell type:code id: tags:
+``` 
+lrs, lrs_losses, lrs_metric_avg, lrs_metric_var = explore_lrs(train_loader_optim,
+                                                              model_optim,
+                                                              optimizer_optim,
+                                                              args_optim,
+                                                              min_learning_rate_power=-6,
+                                                              max_learning_rate_power = 1,
+                                                              num_lrs=8,
+                                                              steps_per_lr=100)
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+lrs=[1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01]
+lrs_losses=  [7.502097129821777, 7.22658634185791, 5.24326229095459, 1.7600191831588745, 1.4037541151046753, 2.136382579803467, 2.1029751300811768, 446.49951171875]
+lrs_metric_avg=[0.0017601490020751954, -0.005245075225830078, -0.041641921997070314, -0.07478624820709229, -0.007052739858627319, 0.04763659238815308, 0.03924872875213623, 9.939403522014619]
+lrs_metric_var=[0.0006510000222988311, 0.0004144988674492198, 0.000668689274974986, 0.013876865854565344, 0.001481160611942387, 0.3384368026131311, 0.8817071610439394, 2157852536609.2454]
+```
+</details>
+%% Cell type:code id: tags:
+``` 
+plot_eval(lrs, lrs_losses, lrs_metric_avg, lrs_metric_var)
+```
+%% Cell type:markdown id: tags:
+## Optimize the training loop
+> Adapt the dataset transformations, batch_size & dataloader, lr & lr_scheduler, and optimizer in order to achieve better classification results in less time.
+> Change this training loop to include:
+> - a learning rate scheduler : https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
+> - a strategy such as early stopping or patience : https://www.kaggle.com/code/akhileshrai/tutorial-early-stopping-vanilla-rnn-pytorch?scriptVersionId=26440051&cellId=10#4.-Early-Stopping
+> **Also think about changing the call to the function if you added arguments.**
+> For you, we added automatic mixed precision which will be seen in the next course
+> **BEFORE RUNNING, WE NEED TO REINITIALIZE THE MODEL, OPTIMIZER AND SCHEDULER FOR A FAIR FIGHT. Rewrite below the changes you have brought to them.**
+%% Cell type:code id: tags:
+``` 
+model_optim = models.resnet18().to(args_optim['device'])
+model_optim.name = 'Resnet-18'
+optimizer_optim = torch.optim.SGD(model_optim.parameters(), args_optim['learning_rate'], args_optim['momentum'], args_optim['weight_decay'])
+scheduler_optim = None
+```
+%% Cell type:code id: tags:
+``` 
+def train_optim(train_loader, val_loader, model, optimizer, criterion, args):
+    '''
+    The default simple training loop
+    '''
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    time_start = time.time()
+    for epoch in range(args['epochs']):
+        print("Epoch ", epoch)
+        for i, (images, labels) in enumerate(train_loader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            # Zero the parameter gradients
+            optimizer.zero_grad()
+            # Forward pass
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            # Backward pass
+            loss.backward()
+            # Optimize
+            optimizer.step()
+        # Evaluate at the end of the epoch on the train set
+        train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
+        print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
+        train_losses.append(train_loss)
+        train_accuracies.append(train_accuracy)
+        # Evaluate at the end of the epoch on the val set
+        val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
+        print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
+        val_losses.append(val_loss)
+        val_accuracies.append(val_accuracy)
+    duration = time.time() - time_start
+    print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
+    results = {'model':model,
+               'train_losses': train_losses,
+               'train_accuracies': train_accuracies,
+               'val_losses': val_losses,
+               'val_accuracies': val_accuracies,
+               'duration':duration}
+    return results
+```
+%% Cell type:markdown id: tags:
+<details>
+<summary>Spoiler</summary>
+```python
+def train_optim(train_loader, val_loader, model, optimizer, criterion, scheduler, args):
+    '''
+    The default simple training loop
+    '''
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    time_start = time.time()
+    scaler = GradScaler()
+    for epoch in range(args['epochs']):
+        print("Epoch ", epoch)
+        for i, (images, labels) in enumerate(train_loader):
+            # distribution of images and labels to all GPUs
+            images = images.to(args['device'], non_blocking=True)
+            labels = labels.to(args['device'], non_blocking=True)
+            # Zero the parameter gradients
+            optimizer.zero_grad()
+            # Forward pass
+            with autocast():
+                outputs = model(images)
+                loss = criterion(outputs, labels)
+            # Backward pass
+            scaler.scale(loss).backward()
+            # Optimize
+            scaler.step(optimizer)
+            # Updates the scale for next iteration.
+            scaler.update()
+            # Update Learning Rate scheduler, warning some schedulers are updated every epoch and not step.
+            if scheduler is not None:
+                scheduler.step()
+        # Evaluate at the end of the epoch
+        train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
+        print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
+        train_losses.append(train_loss)
+        train_accuracies.append(train_accuracy)
+        # Evaluate at the end of the epoch
+        val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
+        print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
+        val_losses.append(val_loss)
+        val_accuracies.append(val_accuracy)
+    duration = time.time() - time_start
+    print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
+    results = {'model':model,
+               'train_losses': train_losses,
+               'train_accuracies': train_accuracies,
+               'val_losses': val_losses,
+               'val_accuracies': val_accuracies,
+               'duration':duration}
+    return results
+```
+</details>
+%% Cell type:code id: tags:
+``` 
+results_optim = train_optim(train_loader_optim, val_loader_optim, model_optim, optimizer_optim, criterion_optim, args_optim)
+```
+%% Cell type:markdown id: tags:
+## Classification performances comparison
+> Take a look at
+>- the loss and accuracy evolution
+>- the difference in timings between the two runs
+%% Cell type:code id: tags:
+``` 
+print("Duration for default setup training:", results_default["duration"])
+print("Duration for optim setup training:", results_optim["duration"])
+```
+%% Cell type:code id: tags:
+``` 
+compare_trainings(results_default, results_optim)
+```
+%% Cell type:markdown id: tags:
+## Tensorboard
+Below we added a profiler and a logger for tensorboard. If you want to do it yourself in future codes, you can take example on the following documentations::
+- Pytorch : https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html
+- IDRIS : http://www.idris.fr/jean-zay/pre-post/jean-zay-tensorboard.html
+> Try to add another metric to the logger, for example the validation loss at each epoch.
+%% Cell type:code id: tags:
+``` 
+def train_default_tensorboard(train_loader, val_loader, model, optimizer, criterion, args, exp_name):
+    log_dir = "./logs/"+exp_name
+    writer = SummaryWriter(log_dir)
+    train_losses = []
+    train_accuracies = []
+    val_losses = []
+    val_accuracies = []
+    time_start = time.time()
+    with torch.profiler.profile(
+        schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
+        on_trace_ready=torch.profiler.tensorboard_trace_handler(log_dir),
+        record_shapes=True,
+        profile_memory=True,
+        with_stack=True
+    ) as prof:
+        for epoch in range(args['epochs']):
+            print("Epoch ", epoch)
+            for i, (images, labels) in enumerate(train_loader):
+                # distribution of images and labels to all GPUs
+                images = images.to(args['device'], non_blocking=True)
+                labels = labels.to(args['device'], non_blocking=True)
+                # Zero the parameter gradients
+                optimizer.zero_grad()
+                # Forward pass
+                outputs = model(images)
+                loss = criterion(outputs, labels)
+                # Log a scalar (loss)
+                writer.add_scalar("Loss/train", loss, i+epoch*len(train_loader))
+                # Backward pass
+                loss.backward()
+                # Optimize
+                optimizer.step()
+                # Indicate to profiler when a step is over
+                prof.step()
+            # Evaluate at the end of the epoch on the train set
+            train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
+            print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
+            train_losses.append(train_loss)
+            train_accuracies.append(train_accuracy)
+            # Evaluate at the end of the epoch on the val set
+            val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
+            print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
+            val_losses.append(val_loss)
+            val_accuracies.append(val_accuracy)
+    duration = time.time() - time_start
+    print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
+    results = {'model':model,
+               'train_losses': train_losses,
+               'train_accuracies': train_accuracies,
+               'val_losses': val_losses,
+               'val_accuracies': val_accuracies,
+               'duration':duration}
+    return results
+```
+%% Cell type:code id: tags:
+``` 
+args["epochs"] = 1
+_ = train_default_tensorboard(train_loader, val_loader, model, optimizer, criterion, args, "default_perf")
+```
+%% Cell type:code id: tags:
+``` 
+# Load the TensorBoard notebook extension
+!pip install torch_tb_profiler
+%load_ext tensorboard
+%tensorboard --logdir logs
+```
--- a/Perceptron/01-Simple-Perceptron.ipynb
+++ b/Perceptron/01-Simple-Perceptron.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [PER57] - Perceptron Model 1957
+<!-- DESC --> Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Implement a historical linear classifier with a historical dataset !
+ - The objective is to predict the type of Iris from the size of the leaves.
+ - Identifying its limitations
+The [IRIS dataset](https://archive.ics.uci.edu/ml/datasets/Iris) is probably one of the oldest datasets, dating back to 1936 .
+## What we're going to do :
+ - Retrieve the dataset, via scikit learn
+ - training and classifying
+## Step 1 - Import and init
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+from sklearn.datasets     import load_iris
+from sklearn.linear_model import Perceptron
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib
+import os,sys
+import fidle
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('PER57')
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Prepare IRIS Dataset
+Retrieve a dataset : http://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets
+About the datesets : https://scikit-learn.org/stable/datasets.html#datasets
+Data fields (X) :
+- 0 : sepal length in cm
+- 1 : sepal width in cm
+- 2 : petal length in cm
+- 3 : petal width in cm
+Class (y) :
+- 0 : class 0=Iris-Setosa, 1=Iris-Versicolour, 2=Iris-Virginica
+### 2.1 - Get dataset
+%% Cell type:code id: tags:
+``` python
+x0,y0 = load_iris(return_X_y=True)
+x = x0[:, (2,3)]     # We only keep fields 2 and 3
+y = y0.copy()
+y[ y0==0 ] = 1       # 1 = Iris setosa
+y[ y0>=1 ] = 0       # 0 = not iris setosa
+df=pd.DataFrame.from_dict({'Length (x1)':x[:,0], 'Width (x2)':x[:,1], 'Setosa {0,1} (y)':y})
+display(df)
+print(f'x shape : {x.shape}')
+print(f'y shape : {y.shape}')
+```
+%% Cell type:markdown id: tags:
+### 2.2 - Train and test sets
+%% Cell type:code id: tags:
+``` python
+x,y = fidle.utils.shuffle_np_dataset(x, y)
+n=int(len(x)*0.8)
+x_train = x[:n]
+y_train = y[:n]
+x_test  = x[n:]
+y_test  = y[n:]
+print(f'x_train shape : {x_train.shape}')
+print(f'y_train shape : {y_train.shape}')
+print(f'x_test shape  : {x_test.shape}')
+print(f'y_test shape  : {y_test.shape}')
+```
+%% Cell type:markdown id: tags:
+## Step 3 - Get a perceptron, and train it
+%% Cell type:code id: tags:
+``` python
+pct = Perceptron(max_iter=100, random_state=82, tol=0.01, verbose=1)
+pct.fit(x_train, y_train)
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Prédictions
+%% Cell type:code id: tags:
+``` python
+y_pred = pct.predict(x_test)
+df=pd.DataFrame.from_dict({'Length (x1)':x_test[:,0], 'Width (x2)':x_test[:,1], 'y_test':y_test, 'y_pred':y_pred})
+display(df[:15])
+```
+%% Cell type:markdown id: tags:
+## Step 5 - Visualisation
+%% Cell type:code id: tags:
+``` python
+def plot_perceptron(x_train,y_train,x_test,y_test):
+    a = -pct.coef_[0][0] / pct.coef_[0][1]
+    b = -pct.intercept_ / pct.coef_[0][1]
+    box=[x.min(axis=0)[0],x.max(axis=0)[0],x.min(axis=0)[1],x.max(axis=0)[1]]
+    mx=(box[1]-box[0])/20
+    my=(box[3]-box[2])/20
+    box=[box[0]-mx,box[1]+mx,box[2]-my,box[3]+my]
+    fig, axs = plt.subplots(1, 1)
+    fig.set_size_inches(10,6)
+    axs.plot(x_train[y_train==1, 0], x_train[y_train==1, 1], "o", color='tomato', label="Iris-Setosa")
+    axs.plot(x_train[y_train==0, 0], x_train[y_train==0, 1], "o", color='steelblue',label="Autres")
+    axs.plot(x_test[y_pred==1, 0],   x_test[y_pred==1, 1],   "o", color='lightsalmon', label="Iris-Setosa (pred)")
+    axs.plot(x_test[y_pred==0, 0],   x_test[y_pred==0, 1],   "o", color='lightblue',   label="Autres (pred)")
+    axs.plot([box[0], box[1]], [a*box[0]+b, a*box[1]+b], "k--", linewidth=2)
+    axs.set_xlabel("Petal length (cm)", labelpad=15) #, fontsize=14)
+    axs.set_ylabel("Petal width (cm)",  labelpad=15) #, fontsize=14)
+    axs.legend(loc="lower right", fontsize=14)
+    axs.set_xlim(box[0],box[1])
+    axs.set_ylim(box[2],box[3])
+    fidle.scrawler.save_fig('01-perceptron-iris')
+    plt.show()
+plot_perceptron(x_train,y_train, x_test,y_test)
+```
+%% Cell type:code id: tags:
+``` python
+fidle.end()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [PER57] - Perceptron Model 1957
+<!-- DESC --> Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Implement a historical linear classifier with a historical dataset !
+ - The objective is to predict the type of Iris from the size of the leaves.
+ - Identifying its limitations
+The [IRIS dataset](https://archive.ics.uci.edu/ml/datasets/Iris) is probably one of the oldest datasets, dating back to 1936 .
+## What we're going to do :
+ - Retrieve the dataset, via scikit learn
+ - training and classifying
+## Step 1 - Import and init
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+from sklearn.datasets     import load_iris
+from sklearn.linear_model import Perceptron
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib
+import os,sys
+import fidle
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('PER57')
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Prepare IRIS Dataset
+Retrieve a dataset : http://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets
+About the datesets : https://scikit-learn.org/stable/datasets.html#datasets
+Data fields (X) :
+- 0 : sepal length in cm
+- 1 : sepal width in cm
+- 2 : petal length in cm
+- 3 : petal width in cm
+Class (y) :
+- 0 : class 0=Iris-Setosa, 1=Iris-Versicolour, 2=Iris-Virginica
+### 2.1 - Get dataset
+%% Cell type:code id: tags:
+``` python
+x0,y0 = load_iris(return_X_y=True)
+x = x0[:, (2,3)]     # We only keep fields 2 and 3
+y = y0.copy()
+y[ y0==0 ] = 1       # 1 = Iris setosa
+y[ y0>=1 ] = 0       # 0 = not iris setosa
+df=pd.DataFrame.from_dict({'Length (x1)':x[:,0], 'Width (x2)':x[:,1], 'Setosa {0,1} (y)':y})
+display(df)
+print(f'x shape : {x.shape}')
+print(f'y shape : {y.shape}')
+```
+%% Cell type:markdown id: tags:
+### 2.2 - Train and test sets
+%% Cell type:code id: tags:
+``` python
+x,y = fidle.utils.shuffle_np_dataset(x, y)
+n=int(len(x)*0.8)
+x_train = x[:n]
+y_train = y[:n]
+x_test  = x[n:]
+y_test  = y[n:]
+print(f'x_train shape : {x_train.shape}')
+print(f'y_train shape : {y_train.shape}')
+print(f'x_test shape  : {x_test.shape}')
+print(f'y_test shape  : {y_test.shape}')
+```
+%% Cell type:markdown id: tags:
+## Step 3 - Get a perceptron, and train it
+%% Cell type:code id: tags:
+``` python
+pct = Perceptron(max_iter=100, random_state=82, tol=0.01, verbose=1)
+pct.fit(x_train, y_train)
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Prédictions
+%% Cell type:code id: tags:
+``` python
+y_pred = pct.predict(x_test)
+df=pd.DataFrame.from_dict({'Length (x1)':x_test[:,0], 'Width (x2)':x_test[:,1], 'y_test':y_test, 'y_pred':y_pred})
+display(df[:15])
+```
+%% Cell type:markdown id: tags:
+## Step 5 - Visualisation
+%% Cell type:code id: tags:
+``` python
+def plot_perceptron(x_train,y_train,x_test,y_test):
+    a = -pct.coef_[0][0] / pct.coef_[0][1]
+    b = -pct.intercept_ / pct.coef_[0][1]
+    box=[x.min(axis=0)[0],x.max(axis=0)[0],x.min(axis=0)[1],x.max(axis=0)[1]]
+    mx=(box[1]-box[0])/20
+    my=(box[3]-box[2])/20
+    box=[box[0]-mx,box[1]+mx,box[2]-my,box[3]+my]
+    fig, axs = plt.subplots(1, 1)
+    fig.set_size_inches(10,6)
+    axs.plot(x_train[y_train==1, 0], x_train[y_train==1, 1], "o", color='tomato', label="Iris-Setosa")
+    axs.plot(x_train[y_train==0, 0], x_train[y_train==0, 1], "o", color='steelblue',label="Autres")
+    axs.plot(x_test[y_pred==1, 0],   x_test[y_pred==1, 1],   "o", color='lightsalmon', label="Iris-Setosa (pred)")
+    axs.plot(x_test[y_pred==0, 0],   x_test[y_pred==0, 1],   "o", color='lightblue',   label="Autres (pred)")
+    axs.plot([box[0], box[1]], [a*box[0]+b, a*box[1]+b], "k--", linewidth=2)
+    axs.set_xlabel("Petal length (cm)", labelpad=15) #, fontsize=14)
+    axs.set_ylabel("Petal width (cm)",  labelpad=15) #, fontsize=14)
+    axs.legend(loc="lower right", fontsize=14)
+    axs.set_xlim(box[0],box[1])
+    axs.set_ylim(box[2],box[3])
+    fidle.scrawler.save_fig('01-perceptron-iris')
+    plt.show()
+plot_perceptron(x_train,y_train, x_test,y_test)
+```
+%% Cell type:code id: tags:
+``` python
+fidle.end()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/README.ipynb
+++ b/README.ipynb
+%% Cell type:code id:d8a1f0f7 tags:
+``` python
+from IPython.display import display,Markdown
+display(Markdown(open('README.md', 'r').read()))
+#
+# This README is visible under Jupiter Lab ;-)# Automatically generated on : 06/01/25 16:42:31
+```
+%% Output
+    <a name="top"></a>
+    [<img width="600px" src="fidle/img/title.svg"></img>](#top)
+    <!-- --------------------------------------------------- -->
+    <!-- To correctly view this README under Jupyter Lab     -->
+    <!-- Open the notebook: README.ipynb!                    -->
+    <!-- --------------------------------------------------- -->
+    ## About Fidle
+    This repository contains all the documents and links of the **Fidle Training** .
+    Fidle (for Formation Introduction au Deep Learning) is a 3-day training session co-organized
+    by the 3IA MIAI institute, the CNRS, via the Mission for Transversal and Interdisciplinary
+    Initiatives (MITI) and the University of Grenoble Alpes (UGA).
+    The objectives of this training are :
+     - Understanding the **bases of Deep Learning** neural networks
+     - Develop a **first experience** through simple and representative examples
+     - Understanding **Tensorflow/Keras** and **Jupyter lab** technologies
+     - Apprehend the **academic computing environments** Tier-2 or Tier-1 with powerfull GPU
+    For more information, see **https://fidle.cnrs.fr** :
+    - **[Fidle site](https://fidle.cnrs.fr)**
+    - **[Presentation of the training](https://fidle.cnrs.fr/presentation)**
+    - **[Detailed program](https://fidle.cnrs.fr/programme)**
+    - **[Subscribe to the list](https://fidle.cnrs.fr/listeinfo), to stay informed !**
+    - **[Corrected notebooks](https://fidle.cnrs.fr/done)**
+    - **[Follow us on our channel :](https://fidle.cnrs.fr/youtube)**\
+    [<img width="120px" style="vertical-align:middle" src="fidle/img/logo-YouTube.png"></img>](https://fidle.cnrs.fr/youtube)
+    For more information, you can contact us at :
+    [<img width="200px" style="vertical-align:middle" src="fidle/img/00-Mail_contact.svg"></img>](#top)
+    Current Version : <!-- VERSION_BEGIN -->3.0.15<!-- VERSION_END -->
+    ## Course materials
+    | Courses | Notebooks | Datasets | Videos |
+    |:--:|:--:|:--:|:--:|
+    | [<img width="50px" src="fidle/img/00-Fidle-pdf.svg"></img><br>**Course slides**](https://fidle.cnrs.fr/supports)<br>The course in pdf format<br>| [<img width="50px" src="fidle/img/00-Notebooks.svg"></img><br>**Notebooks**](https://fidle.cnrs.fr/notebooks)<br> &nbsp;&nbsp;&nbsp;&nbsp;Get a Zip or clone this repository &nbsp;&nbsp;&nbsp;&nbsp;<br>| [<img width="50px" src="fidle/img/00-Datasets-tar.svg"></img><br>**Datasets**](https://fidle.cnrs.fr/datasets-fidle.tar)<br>All the needed datasets<br>|[<img width="50px" src="fidle/img/00-Videos.svg"></img><br>**Videos**](https://fidle.cnrs.fr/youtube)<br>&nbsp;&nbsp;&nbsp;&nbsp;Our Youtube channel&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|
+    Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
+    ## Jupyter notebooks
+    <!-- TOC_BEGIN -->
+    <!-- Automatically generated on : 06/01/25 16:42:30 -->
+    ### Linear and logistic regression
+    - **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb)
+    Low-level implementation, using numpy, of a direct resolution for a linear regression
+    - **[GRAD1](LinearReg/02-Gradient-descent.ipynb)** - [Linear regression with gradient descent](LinearReg/02-Gradient-descent.ipynb)
+    Low level implementation of a solution by gradient descent. Basic and stochastic approach.
+    - **[POLR1](LinearReg/03-Polynomial-Regression.ipynb)** - [Complexity Syndrome](LinearReg/03-Polynomial-Regression.ipynb)
+    Illustration of the problem of complexity with the polynomial regression
+    - **[LOGR1](LinearReg/04-Logistic-Regression.ipynb)** - [Logistic regression](LinearReg/04-Logistic-Regression.ipynb)
+    Simple example of logistic regression with a sklearn solution
+    ### Perceptron Model 1957
+    - **[PER57](Perceptron/01-Simple-Perceptron.ipynb)** - [Perceptron Model 1957](Perceptron/01-Simple-Perceptron.ipynb)
+    Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
+    ### BHPD regression (DNN), using Keras3/PyTorch
+    - **[K3BHPD1](BHPD.Keras3/01-DNN-Regression.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.Keras3/01-DNN-Regression.ipynb)
+    Simple example of a regression with the dataset Boston Housing Prices Dataset (BHPD)
+    - **[K3BHPD2](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)** - [Regression with a Dense Network (DNN) - Advanced code](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)
+    A more advanced implementation of the precedent example, using Keras3
+    ### BHPD regression (DNN), using PyTorch
+    - **[PBHPD1](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)
+    A Simple regression with a Dense Neural Network (DNN) using Pytorch - BHPD dataset
+    ### Wine Quality prediction (DNN), using Keras3/PyTorch
+    - **[K3WINE1](Wine.Keras3/01-DNN-Wine-Regression.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Keras3/01-DNN-Wine-Regression.ipynb)
+    Another example of regression, with a wine quality prediction, using Keras 3 and PyTorch
+    ### Wine Quality prediction (DNN), using PyTorch/Lightning
+    - **[LWINE1](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)
+    Another example of regression, with a wine quality prediction, using PyTorch Lightning
+    ### MNIST classification (DNN,CNN), using Keras3/PyTorch
+    - **[K3MNIST1](MNIST.Keras3/01-DNN-MNIST.ipynb)** - [Simple classification with DNN](MNIST.Keras3/01-DNN-MNIST.ipynb)
+    An example of classification using a dense neural network for the famous MNIST dataset
+    - **[K3MNIST2](MNIST.Keras3/02-CNN-MNIST.ipynb)** - [Simple classification with CNN](MNIST.Keras3/02-CNN-MNIST.ipynb)
+    An example of classification using a convolutional neural network for the famous MNIST dataset
+    ### MNIST classification (DNN,CNN), using PyTorch
+    - **[PMNIST1](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)** - [Simple classification with DNN](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)
+    Example of classification with a fully connected neural network, using Pytorch
+    ### MNIST classification (DNN,CNN), using PyTorch/Lightning
+    - **[LMNIST1](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)** - [Simple classification with DNN](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)
+    An example of classification using a dense neural network for the famous MNIST dataset, using PyTorch Lightning
+    - **[LMNIST2](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)** - [Simple classification with CNN](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)
+    An example of classification using a convolutional neural network for the famous MNIST dataset, using PyTorch Lightning
+    ### Images classification GTSRB with Convolutional Neural Networks (CNN), using Keras3/PyTorch
+    - **[K3GTSRB1](GTSRB.Keras3/01-Preparation-of-data.ipynb)** - [Dataset analysis and preparation](GTSRB.Keras3/01-Preparation-of-data.ipynb)
+    Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
+    - **[K3GTSRB2](GTSRB.Keras3/02-First-convolutions.ipynb)** - [First convolutions](GTSRB.Keras3/02-First-convolutions.ipynb)
+    Episode 2 : First convolutions and first classification of our traffic signs, using Keras3
+    - **[K3GTSRB3](GTSRB.Keras3/03-Better-convolutions.ipynb)** - [Training monitoring](GTSRB.Keras3/03-Better-convolutions.ipynb)
+    Episode 3 : Monitoring, analysis and check points during a training session, using Keras3
+    - **[K3GTSRB10](GTSRB.Keras3/batch_oar.sh)** - [OAR batch script submission](GTSRB.Keras3/batch_oar.sh)
+    Bash script for an OAR batch submission of an ipython code
+    - **[K3GTSRB11](GTSRB.Keras3/batch_slurm.sh)** - [SLURM batch script](GTSRB.Keras3/batch_slurm.sh)
+    Bash script for a Slurm batch submission of an ipython code
+    ### Sentiment analysis with word embedding, using Keras3/PyTorch
+    - **[K3IMDB1](Embedding.Keras3/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras3/01-One-hot-encoding.ipynb)
+    A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+    - **[K3IMDB2](Embedding.Keras3/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras3/02-Keras-embedding.ipynb)
+    A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+    - **[K3IMDB3](Embedding.Keras3/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras3/03-Prediction.ipynb)
+    Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch
+    - **[K3IMDB4](Embedding.Keras3/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras3/04-Show-vectors.ipynb)
+    Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch
+    - **[K3IMDB5](Embedding.Keras3/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras3/05-LSTM-Keras.ipynb)
+    Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch
+    ### Time series with Recurrent Neural Network (RNN), using Keras3/PyTorch
+    - **[K3LADYB1](RNN.Keras3/01-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras3/01-Ladybug.ipynb)
+    Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch
+    ### Graph Neural Networks
+    ### Unsupervised learning with an autoencoder neural network (AE), using Keras3
+    - **[K3AE1](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)** - [Prepare a noisy MNIST dataset](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)
+    Episode 1: Preparation of a noisy MNIST dataset
+    - **[K3AE2](AE.Keras3/02-AE-with-MNIST.ipynb)** - [Building and training an AE denoiser model](AE.Keras3/02-AE-with-MNIST.ipynb)
+    Episode 1 : Construction of a denoising autoencoder and training of it with a noisy MNIST dataset.
+    - **[K3AE3](AE.Keras3/03-AE-with-MNIST-post.ipynb)** - [Playing with our denoiser model](AE.Keras3/03-AE-with-MNIST-post.ipynb)
+    Episode 2 : Using the previously trained autoencoder to denoise data
+    - **[K3AE4](AE.Keras3/04-ExtAE-with-MNIST.ipynb)** - [Denoiser and classifier model](AE.Keras3/04-ExtAE-with-MNIST.ipynb)
+    Episode 4 : Construction of a denoiser and classifier model
+    - **[K3AE5](AE.Keras3/05-ExtAE-with-MNIST.ipynb)** - [Advanced denoiser and classifier model](AE.Keras3/05-ExtAE-with-MNIST.ipynb)
+    Episode 5 : Construction of an advanced denoiser and classifier model
+    ### Generative network with Variational Autoencoder (VAE), using Keras3
+    - **[K3VAE1](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)** - [First VAE, using functional API (MNIST dataset)](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)
+    Construction and training of a VAE, using functional APPI, with a latent space of small dimension.
+    - **[K3VAE2](VAE.Keras3/02-VAE-with-MNIST.ipynb)** - [VAE, using a custom model class  (MNIST dataset)](VAE.Keras3/02-VAE-with-MNIST.ipynb)
+    Construction and training of a VAE, using model subclass, with a latent space of small dimension.
+    - **[K3VAE3](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)** - [Analysis of the VAE's latent space of MNIST dataset](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)
+    Visualization and analysis of the VAE's latent space of the dataset MNIST
+    ### Generative Adversarial Networks (GANs), using Lightning
+    - **[PLSHEEP3](DCGAN.Lightning/01-DCGAN-PL.ipynb)** - [A DCGAN to Draw a Sheep, using Pytorch Lightning](DCGAN.Lightning/01-DCGAN-PL.ipynb)
+    "Draw me a sheep", revisited with a DCGAN, using Pytorch Lightning
+    ### Diffusion Model (DDPM) using PyTorch
+    - **[DDPM1](DDPM.PyTorch/01-ddpm.ipynb)** - [Fashion MNIST Generation with DDPM](DDPM.PyTorch/01-ddpm.ipynb)
+    Diffusion Model example, to generate Fashion MNIST images.
+    - **[DDPM2](DDPM.PyTorch/model.py)** - [DDPM Python classes](DDPM.PyTorch/model.py)
+    Python classes used by DDMP Example
+    ### Training optimization, using PyTorch
+    - **[OPT1](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)** - [Training setup optimization](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)
+    The goal of this notebook is to go through a typical deep learning model training
+    ### Deep Reinforcement Learning (DRL), using PyTorch
+    - **[DRL1](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)** - [Solving CartPole with DQN](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)
+    Using a a Deep Q-Network to play CartPole - an inverted pendulum problem (PyTorch)
+    - **[DRL2](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)** - [RL Baselines3 Zoo: Training in Colab](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)
+    Demo of Stable baseline3 with Colab
+    ### Miscellaneous things, but very important!
+    - **[NP1](Misc/00-Numpy.ipynb)** - [A short introduction to Numpy](Misc/00-Numpy.ipynb)
+    Numpy is an essential tool for the Scientific Python.
+    - **[ACTF1](Misc/01-Activation-Functions.ipynb)** - [Activation functions](Misc/01-Activation-Functions.ipynb)
+    Some activation functions, with their derivatives.
+    - **[PANDAS1](Misc/02-Using-pandas.ipynb)** - [Quelques exemples avec Pandas](Misc/02-Using-pandas.ipynb)
+    pandas is another essential tool for the Scientific Python.
+    - **[PYTORCH1](Misc/03-Using-Pytorch.ipynb)** - [Practical Lab : PyTorch](Misc/03-Using-Pytorch.ipynb)
+    PyTorch est l'un des principaux framework utilisé dans le Deep Learning
+    - **[TSB1](Misc/04-Using-Tensorboard.ipynb)** - [Tensorboard with/from Jupyter ](Misc/04-Using-Tensorboard.ipynb)
+    4 ways to use Tensorboard from the Jupyter environment
+    - **[K3LSTM1](Misc/05-RNN.ipynb)** - [Basic Keras LSTM Layer](Misc/05-RNN.ipynb)
+    A small example of an LSTM layer in Keras
+    - **[PGRAD1](Misc/06-Gradients.ipynb)** - [Gradient illustration with PyTorch](Misc/06-Gradients.ipynb)
+    Exemple de calcul d'un gradient avec PyTorch
+    - **[FID1](Misc/99-Fid-Example.ipynb)** - [Exemple de notebook Fidle ](Misc/99-Fid-Example.ipynb)
+    Un simple exemple de notebook Fidle
+    <!-- TOC_END -->
+    ## Installation
+    Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
+    ## Licence
+    [<img width="100px" src="fidle/img/00-fidle-CC BY-NC-SA.svg"></img>](https://creativecommons.org/licenses/by-nc-sa/4.0/)
+    \[en\] Attribution - NonCommercial - ShareAlike 4.0 International (CC BY-NC-SA 4.0)
+    \[Fr\] Attribution - Pas d’Utilisation Commerciale - Partage dans les Mêmes Conditions 4.0 International
+    See [License](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
+    See [Disclaimer](https://creativecommons.org/licenses/by-nc-sa/4.0/#).
+    ----
+    [<img width="80px" src="fidle/img/logo-paysage.svg"></img>](#top)
+%% Cell type:code id:d8a1f0f7 tags:
+``` python
+from IPython.display import display,Markdown
+display(Markdown(open('README.md', 'r').read()))
+#
+# This README is visible under Jupiter Lab ;-)# Automatically generated on : 06/01/25 16:42:31
+```
+%% Output
+    <a name="top"></a>
+    [<img width="600px" src="fidle/img/title.svg"></img>](#top)
+    <!-- --------------------------------------------------- -->
+    <!-- To correctly view this README under Jupyter Lab     -->
+    <!-- Open the notebook: README.ipynb!                    -->
+    <!-- --------------------------------------------------- -->
+    ## About Fidle
+    This repository contains all the documents and links of the **Fidle Training** .
+    Fidle (for Formation Introduction au Deep Learning) is a 3-day training session co-organized
+    by the 3IA MIAI institute, the CNRS, via the Mission for Transversal and Interdisciplinary
+    Initiatives (MITI) and the University of Grenoble Alpes (UGA).
+    The objectives of this training are :
+     - Understanding the **bases of Deep Learning** neural networks
+     - Develop a **first experience** through simple and representative examples
+     - Understanding **Tensorflow/Keras** and **Jupyter lab** technologies
+     - Apprehend the **academic computing environments** Tier-2 or Tier-1 with powerfull GPU
+    For more information, see **https://fidle.cnrs.fr** :
+    - **[Fidle site](https://fidle.cnrs.fr)**
+    - **[Presentation of the training](https://fidle.cnrs.fr/presentation)**
+    - **[Detailed program](https://fidle.cnrs.fr/programme)**
+    - **[Subscribe to the list](https://fidle.cnrs.fr/listeinfo), to stay informed !**
+    - **[Corrected notebooks](https://fidle.cnrs.fr/done)**
+    - **[Follow us on our channel :](https://fidle.cnrs.fr/youtube)**\
+    [<img width="120px" style="vertical-align:middle" src="fidle/img/logo-YouTube.png"></img>](https://fidle.cnrs.fr/youtube)
+    For more information, you can contact us at :
+    [<img width="200px" style="vertical-align:middle" src="fidle/img/00-Mail_contact.svg"></img>](#top)
+    Current Version : <!-- VERSION_BEGIN -->3.0.15<!-- VERSION_END -->
+    ## Course materials
+    | Courses | Notebooks | Datasets | Videos |
+    |:--:|:--:|:--:|:--:|
+    | [<img width="50px" src="fidle/img/00-Fidle-pdf.svg"></img><br>**Course slides**](https://fidle.cnrs.fr/supports)<br>The course in pdf format<br>| [<img width="50px" src="fidle/img/00-Notebooks.svg"></img><br>**Notebooks**](https://fidle.cnrs.fr/notebooks)<br> &nbsp;&nbsp;&nbsp;&nbsp;Get a Zip or clone this repository &nbsp;&nbsp;&nbsp;&nbsp;<br>| [<img width="50px" src="fidle/img/00-Datasets-tar.svg"></img><br>**Datasets**](https://fidle.cnrs.fr/datasets-fidle.tar)<br>All the needed datasets<br>|[<img width="50px" src="fidle/img/00-Videos.svg"></img><br>**Videos**](https://fidle.cnrs.fr/youtube)<br>&nbsp;&nbsp;&nbsp;&nbsp;Our Youtube channel&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|
+    Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
+    ## Jupyter notebooks
+    <!-- TOC_BEGIN -->
+    <!-- Automatically generated on : 06/01/25 16:42:30 -->
+    ### Linear and logistic regression
+    - **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb)
+    Low-level implementation, using numpy, of a direct resolution for a linear regression
+    - **[GRAD1](LinearReg/02-Gradient-descent.ipynb)** - [Linear regression with gradient descent](LinearReg/02-Gradient-descent.ipynb)
+    Low level implementation of a solution by gradient descent. Basic and stochastic approach.
+    - **[POLR1](LinearReg/03-Polynomial-Regression.ipynb)** - [Complexity Syndrome](LinearReg/03-Polynomial-Regression.ipynb)
+    Illustration of the problem of complexity with the polynomial regression
+    - **[LOGR1](LinearReg/04-Logistic-Regression.ipynb)** - [Logistic regression](LinearReg/04-Logistic-Regression.ipynb)
+    Simple example of logistic regression with a sklearn solution
+    ### Perceptron Model 1957
+    - **[PER57](Perceptron/01-Simple-Perceptron.ipynb)** - [Perceptron Model 1957](Perceptron/01-Simple-Perceptron.ipynb)
+    Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
+    ### BHPD regression (DNN), using Keras3/PyTorch
+    - **[K3BHPD1](BHPD.Keras3/01-DNN-Regression.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.Keras3/01-DNN-Regression.ipynb)
+    Simple example of a regression with the dataset Boston Housing Prices Dataset (BHPD)
+    - **[K3BHPD2](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)** - [Regression with a Dense Network (DNN) - Advanced code](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)
+    A more advanced implementation of the precedent example, using Keras3
+    ### BHPD regression (DNN), using PyTorch
+    - **[PBHPD1](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)
+    A Simple regression with a Dense Neural Network (DNN) using Pytorch - BHPD dataset
+    ### Wine Quality prediction (DNN), using Keras3/PyTorch
+    - **[K3WINE1](Wine.Keras3/01-DNN-Wine-Regression.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Keras3/01-DNN-Wine-Regression.ipynb)
+    Another example of regression, with a wine quality prediction, using Keras 3 and PyTorch
+    ### Wine Quality prediction (DNN), using PyTorch/Lightning
+    - **[LWINE1](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)
+    Another example of regression, with a wine quality prediction, using PyTorch Lightning
+    ### MNIST classification (DNN,CNN), using Keras3/PyTorch
+    - **[K3MNIST1](MNIST.Keras3/01-DNN-MNIST.ipynb)** - [Simple classification with DNN](MNIST.Keras3/01-DNN-MNIST.ipynb)
+    An example of classification using a dense neural network for the famous MNIST dataset
+    - **[K3MNIST2](MNIST.Keras3/02-CNN-MNIST.ipynb)** - [Simple classification with CNN](MNIST.Keras3/02-CNN-MNIST.ipynb)
+    An example of classification using a convolutional neural network for the famous MNIST dataset
+    ### MNIST classification (DNN,CNN), using PyTorch
+    - **[PMNIST1](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)** - [Simple classification with DNN](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)
+    Example of classification with a fully connected neural network, using Pytorch
+    ### MNIST classification (DNN,CNN), using PyTorch/Lightning
+    - **[LMNIST1](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)** - [Simple classification with DNN](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)
+    An example of classification using a dense neural network for the famous MNIST dataset, using PyTorch Lightning
+    - **[LMNIST2](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)** - [Simple classification with CNN](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)
+    An example of classification using a convolutional neural network for the famous MNIST dataset, using PyTorch Lightning
+    ### Images classification GTSRB with Convolutional Neural Networks (CNN), using Keras3/PyTorch
+    - **[K3GTSRB1](GTSRB.Keras3/01-Preparation-of-data.ipynb)** - [Dataset analysis and preparation](GTSRB.Keras3/01-Preparation-of-data.ipynb)
+    Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
+    - **[K3GTSRB2](GTSRB.Keras3/02-First-convolutions.ipynb)** - [First convolutions](GTSRB.Keras3/02-First-convolutions.ipynb)
+    Episode 2 : First convolutions and first classification of our traffic signs, using Keras3
+    - **[K3GTSRB3](GTSRB.Keras3/03-Better-convolutions.ipynb)** - [Training monitoring](GTSRB.Keras3/03-Better-convolutions.ipynb)
+    Episode 3 : Monitoring, analysis and check points during a training session, using Keras3
+    - **[K3GTSRB10](GTSRB.Keras3/batch_oar.sh)** - [OAR batch script submission](GTSRB.Keras3/batch_oar.sh)
+    Bash script for an OAR batch submission of an ipython code
+    - **[K3GTSRB11](GTSRB.Keras3/batch_slurm.sh)** - [SLURM batch script](GTSRB.Keras3/batch_slurm.sh)
+    Bash script for a Slurm batch submission of an ipython code
+    ### Sentiment analysis with word embedding, using Keras3/PyTorch
+    - **[K3IMDB1](Embedding.Keras3/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras3/01-One-hot-encoding.ipynb)
+    A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+    - **[K3IMDB2](Embedding.Keras3/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras3/02-Keras-embedding.ipynb)
+    A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+    - **[K3IMDB3](Embedding.Keras3/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras3/03-Prediction.ipynb)
+    Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch
+    - **[K3IMDB4](Embedding.Keras3/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras3/04-Show-vectors.ipynb)
+    Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch
+    - **[K3IMDB5](Embedding.Keras3/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras3/05-LSTM-Keras.ipynb)
+    Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch
+    ### Time series with Recurrent Neural Network (RNN), using Keras3/PyTorch
+    - **[K3LADYB1](RNN.Keras3/01-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras3/01-Ladybug.ipynb)
+    Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch
+    ### Graph Neural Networks
+    ### Unsupervised learning with an autoencoder neural network (AE), using Keras3
+    - **[K3AE1](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)** - [Prepare a noisy MNIST dataset](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)
+    Episode 1: Preparation of a noisy MNIST dataset
+    - **[K3AE2](AE.Keras3/02-AE-with-MNIST.ipynb)** - [Building and training an AE denoiser model](AE.Keras3/02-AE-with-MNIST.ipynb)
+    Episode 1 : Construction of a denoising autoencoder and training of it with a noisy MNIST dataset.
+    - **[K3AE3](AE.Keras3/03-AE-with-MNIST-post.ipynb)** - [Playing with our denoiser model](AE.Keras3/03-AE-with-MNIST-post.ipynb)
+    Episode 2 : Using the previously trained autoencoder to denoise data
+    - **[K3AE4](AE.Keras3/04-ExtAE-with-MNIST.ipynb)** - [Denoiser and classifier model](AE.Keras3/04-ExtAE-with-MNIST.ipynb)
+    Episode 4 : Construction of a denoiser and classifier model
+    - **[K3AE5](AE.Keras3/05-ExtAE-with-MNIST.ipynb)** - [Advanced denoiser and classifier model](AE.Keras3/05-ExtAE-with-MNIST.ipynb)
+    Episode 5 : Construction of an advanced denoiser and classifier model
+    ### Generative network with Variational Autoencoder (VAE), using Keras3
+    - **[K3VAE1](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)** - [First VAE, using functional API (MNIST dataset)](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)
+    Construction and training of a VAE, using functional APPI, with a latent space of small dimension.
+    - **[K3VAE2](VAE.Keras3/02-VAE-with-MNIST.ipynb)** - [VAE, using a custom model class  (MNIST dataset)](VAE.Keras3/02-VAE-with-MNIST.ipynb)
+    Construction and training of a VAE, using model subclass, with a latent space of small dimension.
+    - **[K3VAE3](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)** - [Analysis of the VAE's latent space of MNIST dataset](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)
+    Visualization and analysis of the VAE's latent space of the dataset MNIST
+    ### Generative Adversarial Networks (GANs), using Lightning
+    - **[PLSHEEP3](DCGAN.Lightning/01-DCGAN-PL.ipynb)** - [A DCGAN to Draw a Sheep, using Pytorch Lightning](DCGAN.Lightning/01-DCGAN-PL.ipynb)
+    "Draw me a sheep", revisited with a DCGAN, using Pytorch Lightning
+    ### Diffusion Model (DDPM) using PyTorch
+    - **[DDPM1](DDPM.PyTorch/01-ddpm.ipynb)** - [Fashion MNIST Generation with DDPM](DDPM.PyTorch/01-ddpm.ipynb)
+    Diffusion Model example, to generate Fashion MNIST images.
+    - **[DDPM2](DDPM.PyTorch/model.py)** - [DDPM Python classes](DDPM.PyTorch/model.py)
+    Python classes used by DDMP Example
+    ### Training optimization, using PyTorch
+    - **[OPT1](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)** - [Training setup optimization](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)
+    The goal of this notebook is to go through a typical deep learning model training
+    ### Deep Reinforcement Learning (DRL), using PyTorch
+    - **[DRL1](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)** - [Solving CartPole with DQN](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)
+    Using a a Deep Q-Network to play CartPole - an inverted pendulum problem (PyTorch)
+    - **[DRL2](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)** - [RL Baselines3 Zoo: Training in Colab](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)
+    Demo of Stable baseline3 with Colab
+    ### Miscellaneous things, but very important!
+    - **[NP1](Misc/00-Numpy.ipynb)** - [A short introduction to Numpy](Misc/00-Numpy.ipynb)
+    Numpy is an essential tool for the Scientific Python.
+    - **[ACTF1](Misc/01-Activation-Functions.ipynb)** - [Activation functions](Misc/01-Activation-Functions.ipynb)
+    Some activation functions, with their derivatives.
+    - **[PANDAS1](Misc/02-Using-pandas.ipynb)** - [Quelques exemples avec Pandas](Misc/02-Using-pandas.ipynb)
+    pandas is another essential tool for the Scientific Python.
+    - **[PYTORCH1](Misc/03-Using-Pytorch.ipynb)** - [Practical Lab : PyTorch](Misc/03-Using-Pytorch.ipynb)
+    PyTorch est l'un des principaux framework utilisé dans le Deep Learning
+    - **[TSB1](Misc/04-Using-Tensorboard.ipynb)** - [Tensorboard with/from Jupyter ](Misc/04-Using-Tensorboard.ipynb)
+    4 ways to use Tensorboard from the Jupyter environment
+    - **[K3LSTM1](Misc/05-RNN.ipynb)** - [Basic Keras LSTM Layer](Misc/05-RNN.ipynb)
+    A small example of an LSTM layer in Keras
+    - **[PGRAD1](Misc/06-Gradients.ipynb)** - [Gradient illustration with PyTorch](Misc/06-Gradients.ipynb)
+    Exemple de calcul d'un gradient avec PyTorch
+    - **[FID1](Misc/99-Fid-Example.ipynb)** - [Exemple de notebook Fidle ](Misc/99-Fid-Example.ipynb)
+    Un simple exemple de notebook Fidle
+    <!-- TOC_END -->
+    ## Installation
+    Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
+    ## Licence
+    [<img width="100px" src="fidle/img/00-fidle-CC BY-NC-SA.svg"></img>](https://creativecommons.org/licenses/by-nc-sa/4.0/)
+    \[en\] Attribution - NonCommercial - ShareAlike 4.0 International (CC BY-NC-SA 4.0)
+    \[Fr\] Attribution - Pas d’Utilisation Commerciale - Partage dans les Mêmes Conditions 4.0 International
+    See [License](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
+    See [Disclaimer](https://creativecommons.org/licenses/by-nc-sa/4.0/#).
+    ----
+    [<img width="80px" src="fidle/img/logo-paysage.svg"></img>](#top)
--- a/README.md
+++ b/README.md
+<a name="top"></a>
+[<img width="600px" src="fidle/img/title.svg"></img>](#top)
-German Traffic Sign Recognition Benchmark (GTSRB)
+<!-- --------------------------------------------------- -->
-=================================================
+<!-- To correctly view this README under Jupyter Lab     -->
---
+<!-- Open the notebook: README.ipynb!                    -->
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020  
+<!-- --------------------------------------------------- -->
-## 1/ Environment
+## About Fidle
-To install your conda environment :  
-```
-conda env create -f environment.yml
-```
-## 4/ Misc
+This repository contains all the documents and links of the **Fidle Training** .   
-To update an existing environment :  
+Fidle (for Formation Introduction au Deep Learning) is a 3-day training session co-organized  
-```
+by the 3IA MIAI institute, the CNRS, via the Mission for Transversal and Interdisciplinary  
-conda env update --name=deeplearning2 --file=environment.yml
+Initiatives (MITI) and the University of Grenoble Alpes (UGA).  
-```
\ No newline at end of file
+The objectives of this training are :
+ - Understanding the **bases of Deep Learning** neural networks
+ - Develop a **first experience** through simple and representative examples
+ - Understanding **Tensorflow/Keras** and **Jupyter lab** technologies
+ - Apprehend the **academic computing environments** Tier-2 or Tier-1 with powerfull GPU
+For more information, see **https://fidle.cnrs.fr** :
+- **[Fidle site](https://fidle.cnrs.fr)**
+- **[Presentation of the training](https://fidle.cnrs.fr/presentation)**
+- **[Detailed program](https://fidle.cnrs.fr/programme)**
+- **[Subscribe to the list](https://fidle.cnrs.fr/listeinfo), to stay informed !**
+- **[Corrected notebooks](https://fidle.cnrs.fr/done)**
+- **[Follow us on our channel :](https://fidle.cnrs.fr/youtube)**\
+[<img width="120px" style="vertical-align:middle" src="fidle/img/logo-YouTube.png"></img>](https://fidle.cnrs.fr/youtube)
+For more information, you can contact us at :  
+[<img width="200px" style="vertical-align:middle" src="fidle/img/00-Mail_contact.svg"></img>](#top)
+Current Version : <!-- VERSION_BEGIN -->3.0.15<!-- VERSION_END -->
+## Course materials
+| Courses | Notebooks | Datasets | Videos |
+|:--:|:--:|:--:|:--:|
+| [<img width="50px" src="fidle/img/00-Fidle-pdf.svg"></img><br>**Course slides**](https://fidle.cnrs.fr/supports)<br>The course in pdf format<br>| [<img width="50px" src="fidle/img/00-Notebooks.svg"></img><br>**Notebooks**](https://fidle.cnrs.fr/notebooks)<br> &nbsp;&nbsp;&nbsp;&nbsp;Get a Zip or clone this repository &nbsp;&nbsp;&nbsp;&nbsp;<br>| [<img width="50px" src="fidle/img/00-Datasets-tar.svg"></img><br>**Datasets**](https://fidle.cnrs.fr/datasets-fidle.tar)<br>All the needed datasets<br>|[<img width="50px" src="fidle/img/00-Videos.svg"></img><br>**Videos**](https://fidle.cnrs.fr/youtube)<br>&nbsp;&nbsp;&nbsp;&nbsp;Our Youtube channel&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|
+Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
+## Jupyter notebooks
+<!-- TOC_BEGIN -->
+<!-- Automatically generated on : 06/01/25 16:42:30 -->
+### Linear and logistic regression
+- **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb)  
+Low-level implementation, using numpy, of a direct resolution for a linear regression
+- **[GRAD1](LinearReg/02-Gradient-descent.ipynb)** - [Linear regression with gradient descent](LinearReg/02-Gradient-descent.ipynb)  
+Low level implementation of a solution by gradient descent. Basic and stochastic approach.
+- **[POLR1](LinearReg/03-Polynomial-Regression.ipynb)** - [Complexity Syndrome](LinearReg/03-Polynomial-Regression.ipynb)  
+Illustration of the problem of complexity with the polynomial regression
+- **[LOGR1](LinearReg/04-Logistic-Regression.ipynb)** - [Logistic regression](LinearReg/04-Logistic-Regression.ipynb)  
+Simple example of logistic regression with a sklearn solution
+### Perceptron Model 1957
+- **[PER57](Perceptron/01-Simple-Perceptron.ipynb)** - [Perceptron Model 1957](Perceptron/01-Simple-Perceptron.ipynb)  
+Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
+### BHPD regression (DNN), using Keras3/PyTorch
+- **[K3BHPD1](BHPD.Keras3/01-DNN-Regression.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.Keras3/01-DNN-Regression.ipynb)  
+Simple example of a regression with the dataset Boston Housing Prices Dataset (BHPD)
+- **[K3BHPD2](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)** - [Regression with a Dense Network (DNN) - Advanced code](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)  
+A more advanced implementation of the precedent example, using Keras3
+### BHPD regression (DNN), using PyTorch
+- **[PBHPD1](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)  
+A Simple regression with a Dense Neural Network (DNN) using Pytorch - BHPD dataset
+### Wine Quality prediction (DNN), using Keras3/PyTorch
+- **[K3WINE1](Wine.Keras3/01-DNN-Wine-Regression.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Keras3/01-DNN-Wine-Regression.ipynb)  
+Another example of regression, with a wine quality prediction, using Keras 3 and PyTorch
+### Wine Quality prediction (DNN), using PyTorch/Lightning
+- **[LWINE1](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)  
+Another example of regression, with a wine quality prediction, using PyTorch Lightning
+### MNIST classification (DNN,CNN), using Keras3/PyTorch
+- **[K3MNIST1](MNIST.Keras3/01-DNN-MNIST.ipynb)** - [Simple classification with DNN](MNIST.Keras3/01-DNN-MNIST.ipynb)  
+An example of classification using a dense neural network for the famous MNIST dataset
+- **[K3MNIST2](MNIST.Keras3/02-CNN-MNIST.ipynb)** - [Simple classification with CNN](MNIST.Keras3/02-CNN-MNIST.ipynb)  
+An example of classification using a convolutional neural network for the famous MNIST dataset
+### MNIST classification (DNN,CNN), using PyTorch
+- **[PMNIST1](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)** - [Simple classification with DNN](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)  
+Example of classification with a fully connected neural network, using Pytorch
+### MNIST classification (DNN,CNN), using PyTorch/Lightning
+- **[LMNIST1](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)** - [Simple classification with DNN](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)  
+An example of classification using a dense neural network for the famous MNIST dataset, using PyTorch Lightning
+- **[LMNIST2](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)** - [Simple classification with CNN](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)  
+An example of classification using a convolutional neural network for the famous MNIST dataset, using PyTorch Lightning
+### Images classification GTSRB with Convolutional Neural Networks (CNN), using Keras3/PyTorch
+- **[K3GTSRB1](GTSRB.Keras3/01-Preparation-of-data.ipynb)** - [Dataset analysis and preparation](GTSRB.Keras3/01-Preparation-of-data.ipynb)  
+Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
+- **[K3GTSRB2](GTSRB.Keras3/02-First-convolutions.ipynb)** - [First convolutions](GTSRB.Keras3/02-First-convolutions.ipynb)  
+Episode 2 : First convolutions and first classification of our traffic signs, using Keras3
+- **[K3GTSRB3](GTSRB.Keras3/03-Better-convolutions.ipynb)** - [Training monitoring](GTSRB.Keras3/03-Better-convolutions.ipynb)  
+Episode 3 : Monitoring, analysis and check points during a training session, using Keras3
+- **[K3GTSRB10](GTSRB.Keras3/batch_oar.sh)** - [OAR batch script submission](GTSRB.Keras3/batch_oar.sh)  
+Bash script for an OAR batch submission of an ipython code
+- **[K3GTSRB11](GTSRB.Keras3/batch_slurm.sh)** - [SLURM batch script](GTSRB.Keras3/batch_slurm.sh)  
+Bash script for a Slurm batch submission of an ipython code
+### Sentiment analysis with word embedding, using Keras3/PyTorch
+- **[K3IMDB1](Embedding.Keras3/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras3/01-One-hot-encoding.ipynb)  
+A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+- **[K3IMDB2](Embedding.Keras3/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras3/02-Keras-embedding.ipynb)  
+A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+- **[K3IMDB3](Embedding.Keras3/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras3/03-Prediction.ipynb)  
+Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch
+- **[K3IMDB4](Embedding.Keras3/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras3/04-Show-vectors.ipynb)  
+Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch
+- **[K3IMDB5](Embedding.Keras3/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras3/05-LSTM-Keras.ipynb)  
+Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch
+### Time series with Recurrent Neural Network (RNN), using Keras3/PyTorch
+- **[K3LADYB1](RNN.Keras3/01-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras3/01-Ladybug.ipynb)  
+Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch
+### Graph Neural Networks
+### Unsupervised learning with an autoencoder neural network (AE), using Keras3
+- **[K3AE1](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)** - [Prepare a noisy MNIST dataset](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)  
+Episode 1: Preparation of a noisy MNIST dataset
+- **[K3AE2](AE.Keras3/02-AE-with-MNIST.ipynb)** - [Building and training an AE denoiser model](AE.Keras3/02-AE-with-MNIST.ipynb)  
+Episode 1 : Construction of a denoising autoencoder and training of it with a noisy MNIST dataset.
+- **[K3AE3](AE.Keras3/03-AE-with-MNIST-post.ipynb)** - [Playing with our denoiser model](AE.Keras3/03-AE-with-MNIST-post.ipynb)  
+Episode 2 : Using the previously trained autoencoder to denoise data
+- **[K3AE4](AE.Keras3/04-ExtAE-with-MNIST.ipynb)** - [Denoiser and classifier model](AE.Keras3/04-ExtAE-with-MNIST.ipynb)  
+Episode 4 : Construction of a denoiser and classifier model
+- **[K3AE5](AE.Keras3/05-ExtAE-with-MNIST.ipynb)** - [Advanced denoiser and classifier model](AE.Keras3/05-ExtAE-with-MNIST.ipynb)  
+Episode 5 : Construction of an advanced denoiser and classifier model
+### Generative network with Variational Autoencoder (VAE), using Keras3
+- **[K3VAE1](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)** - [First VAE, using functional API (MNIST dataset)](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)  
+Construction and training of a VAE, using functional APPI, with a latent space of small dimension.
+- **[K3VAE2](VAE.Keras3/02-VAE-with-MNIST.ipynb)** - [VAE, using a custom model class  (MNIST dataset)](VAE.Keras3/02-VAE-with-MNIST.ipynb)  
+Construction and training of a VAE, using model subclass, with a latent space of small dimension.
+- **[K3VAE3](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)** - [Analysis of the VAE's latent space of MNIST dataset](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)  
+Visualization and analysis of the VAE's latent space of the dataset MNIST
+### Generative Adversarial Networks (GANs), using Lightning
+- **[PLSHEEP3](DCGAN.Lightning/01-DCGAN-PL.ipynb)** - [A DCGAN to Draw a Sheep, using Pytorch Lightning](DCGAN.Lightning/01-DCGAN-PL.ipynb)  
+"Draw me a sheep", revisited with a DCGAN, using Pytorch Lightning
+### Diffusion Model (DDPM) using PyTorch
+- **[DDPM1](DDPM.PyTorch/01-ddpm.ipynb)** - [Fashion MNIST Generation with DDPM](DDPM.PyTorch/01-ddpm.ipynb)  
+Diffusion Model example, to generate Fashion MNIST images.
+- **[DDPM2](DDPM.PyTorch/model.py)** - [DDPM Python classes](DDPM.PyTorch/model.py)  
+Python classes used by DDMP Example
+### Training optimization, using PyTorch
+- **[OPT1](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)** - [Training setup optimization](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)  
+The goal of this notebook is to go through a typical deep learning model training
+### Deep Reinforcement Learning (DRL), using PyTorch
+- **[DRL1](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)** - [Solving CartPole with DQN](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)  
+Using a a Deep Q-Network to play CartPole - an inverted pendulum problem (PyTorch)
+- **[DRL2](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)** - [RL Baselines3 Zoo: Training in Colab](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)  
+Demo of Stable baseline3 with Colab
+### Miscellaneous things, but very important!
+- **[NP1](Misc/00-Numpy.ipynb)** - [A short introduction to Numpy](Misc/00-Numpy.ipynb)  
+Numpy is an essential tool for the Scientific Python.
+- **[ACTF1](Misc/01-Activation-Functions.ipynb)** - [Activation functions](Misc/01-Activation-Functions.ipynb)  
+Some activation functions, with their derivatives.
+- **[PANDAS1](Misc/02-Using-pandas.ipynb)** - [Quelques exemples avec Pandas](Misc/02-Using-pandas.ipynb)  
+pandas is another essential tool for the Scientific Python.
+- **[PYTORCH1](Misc/03-Using-Pytorch.ipynb)** - [Practical Lab : PyTorch](Misc/03-Using-Pytorch.ipynb)  
+PyTorch est l'un des principaux framework utilisé dans le Deep Learning
+- **[TSB1](Misc/04-Using-Tensorboard.ipynb)** - [Tensorboard with/from Jupyter ](Misc/04-Using-Tensorboard.ipynb)  
+4 ways to use Tensorboard from the Jupyter environment
+- **[K3LSTM1](Misc/05-RNN.ipynb)** - [Basic Keras LSTM Layer](Misc/05-RNN.ipynb)  
+A small example of an LSTM layer in Keras
+- **[PGRAD1](Misc/06-Gradients.ipynb)** - [Gradient illustration with PyTorch](Misc/06-Gradients.ipynb)  
+Exemple de calcul d'un gradient avec PyTorch
+- **[FID1](Misc/99-Fid-Example.ipynb)** - [Exemple de notebook Fidle ](Misc/99-Fid-Example.ipynb)  
+Un simple exemple de notebook Fidle
+<!-- TOC_END -->
+## Installation
+Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
+## Licence
+[<img width="100px" src="fidle/img/00-fidle-CC BY-NC-SA.svg"></img>](https://creativecommons.org/licenses/by-nc-sa/4.0/)  
+\[en\] Attribution - NonCommercial - ShareAlike 4.0 International (CC BY-NC-SA 4.0)  
+\[Fr\] Attribution - Pas d’Utilisation Commerciale - Partage dans les Mêmes Conditions 4.0 International  
+See [License](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).  
+See [Disclaimer](https://creativecommons.org/licenses/by-nc-sa/4.0/#).  
+----
+[<img width="80px" src="fidle/img/logo-paysage.svg"></img>](#top)
--- a/RNN.Keras3/01-Ladybug.ipynb
+++ b/RNN.Keras3/01-Ladybug.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [K3LADYB1] - Prediction of a 2D trajectory via RNN
+<!-- DESC --> Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Understanding the use of a recurrent neural network
+## What we're going to do :
+ - Generate an artificial dataset
+ - dataset preparation
+ - Doing our testing
+ - Making predictions
+## Step 1 - Import and init
+### 1.1 - Python
+%% Cell type:code id: tags:
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+import keras
+import numpy as np
+from math import cos, sin
+import random
+import fidle
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3LADYB1')
+```
+%% Cell type:markdown id: tags:
+### 1.2 - Parameters
+%% Cell type:code id: tags:
+``` python
+# ---- About dataset
+#
+max_t        = 1000
+delta_t      = 0.01
+features_len = 2
+sequence_len = 20
+predict_len  = 5
+# ---- About training
+#
+scale         = .2       # Percentage of dataset to be used (1=all)
+train_prop    = .8       # Percentage for train (the rest being for the test)
+batch_size    = 32
+epochs        = 5
+fit_verbosity = 1        # 0 = silent, 1 = progress bar, 2 = one line per epoch
+```
+%% Cell type:markdown id: tags:
+Override parameters (batch mode) - Just forget this cell
+%% Cell type:code id: tags:
+``` python
+fidle.override('scale', 'train_prop', 'sequence_len', 'predict_len', 'batch_size', 'epochs', 'fit_verbosity')
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Generation of a fun dataset
+### 2.1 - Virtual trajectory of our ladybug
+%% Cell type:code id: tags:
+``` python
+def ladybug_init(s=122):
+    if s>0 : random.seed(s)
+    ladybug_init.params_x = [ random.gauss(0.,1.) for u in range(8)]
+    ladybug_init.params_y = [ random.gauss(0.,1.) for u in range(8)]
+def ladybug_move(t):
+    [ax1, ax2, ax3, ax4, kx1, kx2, kx3, kx4] = ladybug_init.params_x
+    [ay1, ay2, ay3, ay4, ky1, ky2, ky3, ky4] = ladybug_init.params_y
+    x = ax1*sin(t*(kx1+20)) + ax2*cos(t*(kx2+10)) + ax3*sin(t*(kx3+5)) + ax4*cos(t*(kx4+5))
+    y = ay1*cos(t*(ky1+20)) + ay2*sin(t*(ky2+10)) + ay3*cos(t*(ky3+5)) + ay4*sin(t*(ky4+5))
+    return x,y
+```
+%% Cell type:markdown id: tags:
+### 2.2 - Get some positions, and build a rescaled and normalized dataset
+%% Cell type:code id: tags:
+``` python
+# ---- Get positions
+#
+ladybug_init(s=16)
+x,y = 0,0
+positions=[]
+for t in np.arange(0., max_t, delta_t):
+    x,y = ladybug_move(t)
+    positions.append([x,y])
+# ---- Build rescaled dataset
+#
+n = int( len(positions)*scale )
+dataset = np.array(positions[:n])
+k = int(len(dataset)*train_prop)
+x_train = dataset[:k]
+x_test  = dataset[k:]
+# ---- Normalize
+#
+mean = x_train.mean()
+std  = x_train.std()
+x_train = (x_train - mean) / std
+x_test  = (x_test  - mean) / std
+print("Dataset generated.")
+print("Train shape is : ", x_train.shape)
+print("Test  shape is : ", x_test.shape)
+```
+%% Cell type:markdown id: tags:
+### 2.3 - Have a look
+An extract from the data we have: the virtual trajectory of our ladybug
+And what we want to predict (in red), from a segment (in blue)
+%% Cell type:code id: tags:
+``` python
+fidle.scrawler.serie_2d(x_train[:1000], figsize=(12,12), lw=1,ms=4,save_as='01-dataset')
+```
+%% Cell type:code id: tags:
+``` python
+k1,k2 = sequence_len, predict_len
+i = random.randint(0,len(x_test)-k1-k2)
+j = i+k1
+fidle.scrawler.segment_2d( x_test[i:j+k2], x_test[j:j+k2],ms=6, save_as='02-objectives')
+```
+%% Cell type:markdown id: tags:
+### 2.4 - Prepare sequences from datasets
+%% Cell type:code id: tags:
+``` python
+# ---- Create sequences and labels for train and test
+#
+xs_train, ys_train=[],[]
+all_i = np.random.permutation( len(x_train) - sequence_len - 1 )
+for i in all_i:
+    xs_train.append( x_train[ i : i+sequence_len ] )
+    ys_train.append( x_train[ i+sequence_len+1 ]   )
+xs_test, ys_test=[],[]
+for i in range( len(x_test) - sequence_len - 1):
+    xs_test.append( x_test[ i : i+sequence_len ] )
+    ys_test.append( x_test[ i+sequence_len+1 ]   )
+# ---- Convert to numpy / float16
+xs_train = np.array(xs_train, dtype='float16')
+ys_train = np.array(ys_train, dtype='float16')
+xs_test  = np.array(xs_test,  dtype='float16')
+ys_test  = np.array(ys_test,  dtype='float16')
+```
+%% Cell type:code id: tags:
+``` python
+fidle.utils.subtitle('About the splitting of our dataset :')
+print('Number of sequences : ', len(xs_train))
+print('xs_train shape      : ',xs_train.shape)
+print('ys_train shape      : ',ys_train.shape)
+fidle.utils.subtitle('What an xs look like :')
+fidle.utils.np_print(xs_train[10] )
+fidle.utils.subtitle('What an ys look like :')
+fidle.utils.np_print(ys_train[10])
+```
+%% Cell type:markdown id: tags:
+## Step 3 - Create a model
+%% Cell type:code id: tags:
+``` python
+model = keras.models.Sequential()
+model.add( keras.layers.InputLayer(shape=(sequence_len, features_len)) )
+model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )
+model.add( keras.layers.Dense(features_len) )
+model.summary()
+model.compile(optimizer='rmsprop',
+              loss='mse',
+              metrics   = ['mae'] )
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Train the model
+### 4.1 Add Callbacks
+%% Cell type:code id: tags:
+``` python
+os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)
+save_dir = f'{run_dir}/models/best_model.keras'
+savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_mae', mode='max', save_best_only=True)
+```
+%% Cell type:markdown id: tags:
+### 4.2 - Train it
+Need 3' on a cpu laptop
+%% Cell type:code id: tags:
+``` python
+chrono=fidle.Chrono()
+chrono.start()
+history=model.fit(xs_train,ys_train,
+                  epochs  = epochs,
+                  verbose = fit_verbosity,
+                  validation_data = (xs_test, ys_test),
+                  callbacks = [savemodel_callback])
+chrono.show()
+```
+%% Cell type:code id: tags:
+``` python
+fidle.scrawler.history(history,plot={'loss':['loss','val_loss'], 'mae':['mae','val_mae']}, save_as='03-history')
+```
+%% Cell type:markdown id: tags:
+## Step 5 - Predict
+%% Cell type:markdown id: tags:
+### 5.1 - Load model
+%% Cell type:code id: tags:
+``` python
+loaded_model = keras.models.load_model(f'{run_dir}/models/best_model.keras')
+print('Loaded.')
+```
+%% Cell type:markdown id: tags:
+### 5.2 - Make a 1-step prediction
+A simple prediction on a single iteration
+%% Cell type:code id: tags:
+``` python
+s=random.randint(0,len(x_test)-sequence_len)
+sequence      = x_test[s:s+sequence_len]
+sequence_true = x_test[s:s+sequence_len+1]
+sequence_pred = loaded_model.predict( np.array([sequence]), verbose=fit_verbosity )
+print('sequence shape      :',sequence.shape)
+print('sequence true shape :',sequence_true.shape)
+print('sequence pred shape :',sequence_pred.shape)
+fidle.scrawler.segment_2d(sequence_true, sequence_pred, save_as='04-one-step-prediction')
+fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, labels=['Axis=0', 'Axis=1'],save_as='05-one-step-prediction-2axis')
+```
+%% Cell type:markdown id: tags:
+### 5.3 - Make n-steps prediction
+A longer term prediction, via a nice iteration function
+We will perform <iteration> predictions to iteratively build our prediction.
+%% Cell type:code id: tags:
+``` python
+def get_prediction(dataset, model, iterations=4):
+    # ---- Initial sequence
+    #
+    s=random.randint(0,len(dataset)-sequence_len-iterations)
+    sequence_pred = dataset[s:s+sequence_len].copy()
+    sequence_true = dataset[s:s+sequence_len+iterations].copy()
+    # ---- Iterate
+    #
+    sequence_pred = list(sequence_pred)
+    for i in range(iterations):
+        sequence   = sequence_pred[-sequence_len:]
+        prediction = model.predict( np.array([sequence]), verbose=fit_verbosity )
+        sequence_pred.append(prediction[0])
+    # ---- Extract the predictions
+    #
+    prediction = np.array(sequence_pred[-iterations:])
+    return sequence_true,prediction
+```
+%% Cell type:markdown id: tags:
+An n-steps prediction :
+%% Cell type:code id: tags:
+``` python
+sequence_true, sequence_pred = get_prediction(x_test, loaded_model, iterations=5)
+fidle.scrawler.segment_2d(sequence_true, sequence_pred, ms=8, save_as='06-n-steps-prediction-norm')
+fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, hide_ticks=True, labels=['Axis=0', 'Axis=1'],save_as='07-n-steps-prediction-norm')
+```
+%% Cell type:code id: tags:
+``` python
+fidle.end()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [K3LADYB1] - Prediction of a 2D trajectory via RNN
+<!-- DESC --> Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Understanding the use of a recurrent neural network
+## What we're going to do :
+ - Generate an artificial dataset
+ - dataset preparation
+ - Doing our testing
+ - Making predictions
+## Step 1 - Import and init
+### 1.1 - Python
+%% Cell type:code id: tags:
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+import keras
+import numpy as np
+from math import cos, sin
+import random
+import fidle
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3LADYB1')
+```
+%% Cell type:markdown id: tags:
+### 1.2 - Parameters
+%% Cell type:code id: tags:
+``` python
+# ---- About dataset
+#
+max_t        = 1000
+delta_t      = 0.01
+features_len = 2
+sequence_len = 20
+predict_len  = 5
+# ---- About training
+#
+scale         = .2       # Percentage of dataset to be used (1=all)
+train_prop    = .8       # Percentage for train (the rest being for the test)
+batch_size    = 32
+epochs        = 5
+fit_verbosity = 1        # 0 = silent, 1 = progress bar, 2 = one line per epoch
+```
+%% Cell type:markdown id: tags:
+Override parameters (batch mode) - Just forget this cell
+%% Cell type:code id: tags:
+``` python
+fidle.override('scale', 'train_prop', 'sequence_len', 'predict_len', 'batch_size', 'epochs', 'fit_verbosity')
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Generation of a fun dataset
+### 2.1 - Virtual trajectory of our ladybug
+%% Cell type:code id: tags:
+``` python
+def ladybug_init(s=122):
+    if s>0 : random.seed(s)
+    ladybug_init.params_x = [ random.gauss(0.,1.) for u in range(8)]
+    ladybug_init.params_y = [ random.gauss(0.,1.) for u in range(8)]
+def ladybug_move(t):
+    [ax1, ax2, ax3, ax4, kx1, kx2, kx3, kx4] = ladybug_init.params_x
+    [ay1, ay2, ay3, ay4, ky1, ky2, ky3, ky4] = ladybug_init.params_y
+    x = ax1*sin(t*(kx1+20)) + ax2*cos(t*(kx2+10)) + ax3*sin(t*(kx3+5)) + ax4*cos(t*(kx4+5))
+    y = ay1*cos(t*(ky1+20)) + ay2*sin(t*(ky2+10)) + ay3*cos(t*(ky3+5)) + ay4*sin(t*(ky4+5))
+    return x,y
+```
+%% Cell type:markdown id: tags:
+### 2.2 - Get some positions, and build a rescaled and normalized dataset
+%% Cell type:code id: tags:
+``` python
+# ---- Get positions
+#
+ladybug_init(s=16)
+x,y = 0,0
+positions=[]
+for t in np.arange(0., max_t, delta_t):
+    x,y = ladybug_move(t)
+    positions.append([x,y])
+# ---- Build rescaled dataset
+#
+n = int( len(positions)*scale )
+dataset = np.array(positions[:n])
+k = int(len(dataset)*train_prop)
+x_train = dataset[:k]
+x_test  = dataset[k:]
+# ---- Normalize
+#
+mean = x_train.mean()
+std  = x_train.std()
+x_train = (x_train - mean) / std
+x_test  = (x_test  - mean) / std
+print("Dataset generated.")
+print("Train shape is : ", x_train.shape)
+print("Test  shape is : ", x_test.shape)
+```
+%% Cell type:markdown id: tags:
+### 2.3 - Have a look
+An extract from the data we have: the virtual trajectory of our ladybug
+And what we want to predict (in red), from a segment (in blue)
+%% Cell type:code id: tags:
+``` python
+fidle.scrawler.serie_2d(x_train[:1000], figsize=(12,12), lw=1,ms=4,save_as='01-dataset')
+```
+%% Cell type:code id: tags:
+``` python
+k1,k2 = sequence_len, predict_len
+i = random.randint(0,len(x_test)-k1-k2)
+j = i+k1
+fidle.scrawler.segment_2d( x_test[i:j+k2], x_test[j:j+k2],ms=6, save_as='02-objectives')
+```
+%% Cell type:markdown id: tags:
+### 2.4 - Prepare sequences from datasets
+%% Cell type:code id: tags:
+``` python
+# ---- Create sequences and labels for train and test
+#
+xs_train, ys_train=[],[]
+all_i = np.random.permutation( len(x_train) - sequence_len - 1 )
+for i in all_i:
+    xs_train.append( x_train[ i : i+sequence_len ] )
+    ys_train.append( x_train[ i+sequence_len+1 ]   )
+xs_test, ys_test=[],[]
+for i in range( len(x_test) - sequence_len - 1):
+    xs_test.append( x_test[ i : i+sequence_len ] )
+    ys_test.append( x_test[ i+sequence_len+1 ]   )
+# ---- Convert to numpy / float16
+xs_train = np.array(xs_train, dtype='float16')
+ys_train = np.array(ys_train, dtype='float16')
+xs_test  = np.array(xs_test,  dtype='float16')
+ys_test  = np.array(ys_test,  dtype='float16')
+```
+%% Cell type:code id: tags:
+``` python
+fidle.utils.subtitle('About the splitting of our dataset :')
+print('Number of sequences : ', len(xs_train))
+print('xs_train shape      : ',xs_train.shape)
+print('ys_train shape      : ',ys_train.shape)
+fidle.utils.subtitle('What an xs look like :')
+fidle.utils.np_print(xs_train[10] )
+fidle.utils.subtitle('What an ys look like :')
+fidle.utils.np_print(ys_train[10])
+```
+%% Cell type:markdown id: tags:
+## Step 3 - Create a model
+%% Cell type:code id: tags:
+``` python
+model = keras.models.Sequential()
+model.add( keras.layers.InputLayer(shape=(sequence_len, features_len)) )
+model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )
+model.add( keras.layers.Dense(features_len) )
+model.summary()
+model.compile(optimizer='rmsprop',
+              loss='mse',
+              metrics   = ['mae'] )
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Train the model
+### 4.1 Add Callbacks
+%% Cell type:code id: tags:
+``` python
+os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)
+save_dir = f'{run_dir}/models/best_model.keras'
+savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_mae', mode='max', save_best_only=True)
+```
+%% Cell type:markdown id: tags:
+### 4.2 - Train it
+Need 3' on a cpu laptop
+%% Cell type:code id: tags:
+``` python
+chrono=fidle.Chrono()
+chrono.start()
+history=model.fit(xs_train,ys_train,
+                  epochs  = epochs,
+                  verbose = fit_verbosity,
+                  validation_data = (xs_test, ys_test),
+                  callbacks = [savemodel_callback])
+chrono.show()
+```
+%% Cell type:code id: tags:
+``` python
+fidle.scrawler.history(history,plot={'loss':['loss','val_loss'], 'mae':['mae','val_mae']}, save_as='03-history')
+```
+%% Cell type:markdown id: tags:
+## Step 5 - Predict
+%% Cell type:markdown id: tags:
+### 5.1 - Load model
+%% Cell type:code id: tags:
+``` python
+loaded_model = keras.models.load_model(f'{run_dir}/models/best_model.keras')
+print('Loaded.')
+```
+%% Cell type:markdown id: tags:
+### 5.2 - Make a 1-step prediction
+A simple prediction on a single iteration
+%% Cell type:code id: tags:
+``` python
+s=random.randint(0,len(x_test)-sequence_len)
+sequence      = x_test[s:s+sequence_len]
+sequence_true = x_test[s:s+sequence_len+1]
+sequence_pred = loaded_model.predict( np.array([sequence]), verbose=fit_verbosity )
+print('sequence shape      :',sequence.shape)
+print('sequence true shape :',sequence_true.shape)
+print('sequence pred shape :',sequence_pred.shape)
+fidle.scrawler.segment_2d(sequence_true, sequence_pred, save_as='04-one-step-prediction')
+fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, labels=['Axis=0', 'Axis=1'],save_as='05-one-step-prediction-2axis')
+```
+%% Cell type:markdown id: tags:
+### 5.3 - Make n-steps prediction
+A longer term prediction, via a nice iteration function
+We will perform <iteration> predictions to iteratively build our prediction.
+%% Cell type:code id: tags:
+``` python
+def get_prediction(dataset, model, iterations=4):
+    # ---- Initial sequence
+    #
+    s=random.randint(0,len(dataset)-sequence_len-iterations)
+    sequence_pred = dataset[s:s+sequence_len].copy()
+    sequence_true = dataset[s:s+sequence_len+iterations].copy()
+    # ---- Iterate
+    #
+    sequence_pred = list(sequence_pred)
+    for i in range(iterations):
+        sequence   = sequence_pred[-sequence_len:]
+        prediction = model.predict( np.array([sequence]), verbose=fit_verbosity )
+        sequence_pred.append(prediction[0])
+    # ---- Extract the predictions
+    #
+    prediction = np.array(sequence_pred[-iterations:])
+    return sequence_true,prediction
+```
+%% Cell type:markdown id: tags:
+An n-steps prediction :
+%% Cell type:code id: tags:
+``` python
+sequence_true, sequence_pred = get_prediction(x_test, loaded_model, iterations=5)
+fidle.scrawler.segment_2d(sequence_true, sequence_pred, ms=8, save_as='06-n-steps-prediction-norm')
+fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, hide_ticks=True, labels=['Axis=0', 'Axis=1'],save_as='07-n-steps-prediction-norm')
+```
+%% Cell type:code id: tags:
+``` python
+fidle.end()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/Transformers.PyTorch/01-Distilbert.ipynb
+++ b/Transformers.PyTorch/01-Distilbert.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [TRANS1] - IMDB, Sentiment analysis with Transformers
+<!-- DESC --> Using a Tranformer to perform a sentiment analysis (IMDB) - Jean Zay version
+<!-- AUTHOR : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS) -->
+By : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS)
+## Objectives :
+ - Complement the learning of a Transformer to perform a sentiment analysis
+ - Understand the use of a pre-trained transformer
+This task is exactly the same as the Sentiment analysis with text embedding. Only this time,
+we are going to exploit the strenght of transformers. Considering how computation-heavy transformer
+pretraining is, we are going to use a pretrained BERT model from HuggingFace.
+This notebook performs the fine-tuning process. If possible, try to use a GPU to speed up
+the training, transformers are difficult to train on CPU.
+## What we are going to do:
+* Retrieve the dataset
+* Prepare the dataset
+* Fetch a pretrained BERT model from HuggingFace's platform (https://huggingface.co/models)
+* Fine-tune the model on a sequence classification task: the sentiment analysis of the IMDB dataset
+* Evaluate the result
+%% Cell type:markdown id: tags:
+## Installations
+**IMPORTANT :** We will need to use the library `transformers` created by HuggingFace.
+The next line only applies on Jean Zay, it allows us to load a very specific environment, which contains Tensorflow with GPU support. Ignore that line if this notebook is not executed on Jean Zay.
+%% Cell type:code id: tags:
+``` python
+#!pip install transformers
+!module load tensorflow-gpu/py3/2.6.0
+```
+%% Cell type:markdown id: tags:
+## Imports and initialisation
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+import tensorflow as tf
+import tensorflow.keras as keras
+import tensorflow.keras.datasets.imdb as imdb
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+from tensorflow.keras.metrics import SparseCategoricalAccuracy
+from tensorflow.keras import mixed_precision
+from transformers import (
+    DistilBertTokenizer,
+    TFDistilBertModel,
+    DataCollatorWithPadding,
+    BertTokenizer,
+    TFBertModel
+)
+import pickle
+import multiprocessing
+import itertools
+import os
+import matplotlib.pyplot as plt
+import seaborn as sns
+print("Tensorflow ", tf.__version__)
+n_gpus = len(tf.config.list_physical_devices('GPU'))
+print("#GPUs: ", n_gpus)
+if n_gpus > 0:
+    !nvidia-smi -L
+policy = mixed_precision.Policy('mixed_float16')
+mixed_precision.set_global_policy(policy)
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+np.random.seed(987654321)
+tf.random.set_seed(987654321)
+```
+%% Cell type:markdown id: tags:
+## Parameters
+* `vocab_size` refers to the number of words which will be remembered in our vocabulary.
+* `hide_most_frequently` is the number of ignored words, among the most common ones.
+* `review_len` is the review length.
+* `n_cpus` is the number of CPU which will be used for data preprocessing.
+* `distil` refers to whether or not we are going to use a DistilBert model or a regular Bert model.
+* `load_locally` will fetch data locally, otherwise will download on the Internet (requires an Internet connection, not possible on Jean Zay)
+%% Cell type:code id: tags:
+``` python
+vocab_size = 30000
+hide_most_frequently = 0
+review_len = 512
+epochs = 1
+batch_size = 32
+fit_verbosity = 1
+scale = 1
+n_cpus = 6
+distil = True
+load_locally = True # if set to False, will fetch data from the internet (requires an internet connection)
+```
+%% Cell type:markdown id: tags:
+## Retrieve the dataset
+%% Cell type:code id: tags:
+``` python
+if load_locally:
+    with open("dataset", "rb") as file_:
+        (x_train, y_train), (x_test, y_test) = pickle.load(file_)
+else:
+    (x_train, y_train), (x_test, y_test) = imdb.load_data(
+        num_words=vocab_size,
+        skip_top=hide_most_frequently,
+        seed=123456789,
+    )
+    with open("dataset", "wb") as file_:
+        pickle.dump(((x_train, y_train), (x_test, y_test)), file_)
+y_train = np.asarray(y_train).astype('float32')
+y_test  = np.asarray(y_test ).astype('float32')
+n1 = int(scale * len(x_train))
+n2 = int(scale * len(x_test))
+x_train, y_train = x_train[:n1], y_train[:n1]
+x_test,  y_test  = x_test[:n2],  y_test[:n2]
+print("x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
+print("x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
+print('\nReview sample (x_train[12]) :\n\n',x_train[12])
+```
+%% Cell type:code id: tags:
+``` python
+if load_locally:
+    with open("word_index", "rb") as file_:
+        word_index = pickle.load(file_)
+else:
+    word_index = imdb.get_word_index()
+    with open("word_index", "wb") as file_:
+        pickle.dump(word_index, file_)
+word_index = {w:(i+3) for w,i in word_index.items()}
+word_index.update({'[PAD]':0, '[CLS]':1, '[UNK]':2})
+index_word = {index:word for word,index in word_index.items()}
+# Add a nice function to transpose:
+def dataset2text(review):
+    return ' '.join([index_word.get(i, "?") for i in review[1:]])
+```
+%% Cell type:code id: tags:
+``` python
+print(dataset2text(x_train[12]))
+```
+%% Cell type:markdown id: tags:
+## Fetch the model from HuggingFace
+%% Cell type:code id: tags:
+``` python
+def load_model(distil, load_locally):
+    if load_locally:
+        if distil:
+            bert_model = TFDistilBertModel.from_pretrained("distilbert_model")
+            tokenizer = DistilBertTokenizer("distilbert_vocab.txt", do_lower_case=True)
+        else:
+            bert_model = TFBertModel.from_pretrained("bert_model")
+            tokenizer = BertTokenizer("bert_vocab.txt", do_lower_case=True)
+        return bert_model, tokenizer
+    if distil:
+        bert_model = TFDistilBertModel.from_pretrained("distilbert-base-uncased")
+        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+        bert_model.save_pretrained("distilbert_model")
+        tokenizer.save_vocabulary("distilbert_vocab.txt")
+    else:
+        bert_model = TFBertModel.from_pretrained("bert-base-uncased")
+        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+        bert_model.save_pretrained("bert_model")
+        tokenizer.save_vocabulary("bert_vocab.txt")
+    return bert_model, tokenizer
+bert_model, tokenizer = load_model(distil, load_locally)
+bert_model.summary()
+```
+%% Cell type:markdown id: tags:
+## Prepare the dataset
+%% Cell type:code id: tags:
+``` python
+def tokenize_sample(sample, tokenizer):
+    return tokenizer(dataset2text(sample), truncation=True, max_length=review_len)
+def distributed_tokenize_dataset(dataset):
+    ds = list(dataset)
+    with multiprocessing.Pool(n_cpus) as pool:
+        tokenized_ds = pool.starmap(
+            tokenize_sample,
+            zip(ds, itertools.repeat(tokenizer, len(ds)))
+        )
+    return tokenized_ds
+tokenized_x_train = distributed_tokenize_dataset(x_train)
+tokenized_x_test = distributed_tokenize_dataset(x_test)
+```
+%% Cell type:code id: tags:
+``` python
+data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf")
+```
+%% Cell type:code id: tags:
+``` python
+data_collator(tokenized_x_train)
+```
+%% Cell type:code id: tags:
+``` python
+def make_dataset(x, y):
+    collated = data_collator(x)
+    dataset = tf.data.Dataset.from_tensor_slices(
+        (collated['input_ids'], collated['attention_mask'], y)
+    )
+    transformed_dataset = (
+        dataset
+        .map(
+            lambda x, y, z: ((x, y), z)
+        )
+        .shuffle(25000)
+        .batch(batch_size)
+    )
+    return transformed_dataset
+train_ds = make_dataset(tokenized_x_train, y_train)
+test_ds = make_dataset(tokenized_x_test, y_test)
+for x, y in train_ds:
+    print(x)
+    break
+```
+%% Cell type:markdown id: tags:
+## Add a new head to the model
+%% Cell type:code id: tags:
+``` python
+class ClassificationModel(keras.Model):
+    def __init__(self, bert_model):
+        super(ClassificationModel, self).__init__()
+        self.bert_model = bert_model
+        self.pre_classifier = Dense(768, activation='relu')
+        self.dropout = Dropout(0.1)
+        self.classifier = Dense(2)
+    def call(self, x):
+        x = self.bert_model(x)
+        x = x.last_hidden_state
+        x = x[:, 0] # get the output of the classification token
+        x = self.pre_classifier(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+```
+%% Cell type:code id: tags:
+``` python
+model = ClassificationModel(bert_model)
+x = next(iter(train_ds))[0]
+model(x)
+model.summary()
+```
+%% Cell type:markdown id: tags:
+## Train!
+%% Cell type:code id: tags:
+``` python
+model.compile(
+    optimizer=Adam(1e-05),
+    loss=SparseCategoricalCrossentropy(from_logits=True),
+    metrics=[SparseCategoricalAccuracy('accuracy')]
+)
+```
+%% Cell type:code id: tags:
+``` python
+history = model.fit(
+    train_ds,
+    epochs=epochs,
+    verbose=fit_verbosity
+)
+```
+%% Cell type:markdown id: tags:
+## Evaluation
+%% Cell type:code id: tags:
+``` python
+_, score = model.evaluate(test_ds)
+colors = sns.color_palette('pastel')[2:]
+accuracy_score = [score, 1 - score]
+plt.pie(
+    accuracy_score,
+    labels=["Accurate", "Mistaken"],
+    colors=colors,
+    autopct=lambda val: f"{val:.2f}%",
+    explode=(0.0, 0.1)
+)
+plt.show()
+```
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [TRANS1] - IMDB, Sentiment analysis with Transformers
+<!-- DESC --> Using a Tranformer to perform a sentiment analysis (IMDB) - Jean Zay version
+<!-- AUTHOR : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS) -->
+By : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS)
+## Objectives :
+ - Complement the learning of a Transformer to perform a sentiment analysis
+ - Understand the use of a pre-trained transformer
+This task is exactly the same as the Sentiment analysis with text embedding. Only this time,
+we are going to exploit the strenght of transformers. Considering how computation-heavy transformer
+pretraining is, we are going to use a pretrained BERT model from HuggingFace.
+This notebook performs the fine-tuning process. If possible, try to use a GPU to speed up
+the training, transformers are difficult to train on CPU.
+## What we are going to do:
+* Retrieve the dataset
+* Prepare the dataset
+* Fetch a pretrained BERT model from HuggingFace's platform (https://huggingface.co/models)
+* Fine-tune the model on a sequence classification task: the sentiment analysis of the IMDB dataset
+* Evaluate the result
+%% Cell type:markdown id: tags:
+## Installations
+**IMPORTANT :** We will need to use the library `transformers` created by HuggingFace.
+The next line only applies on Jean Zay, it allows us to load a very specific environment, which contains Tensorflow with GPU support. Ignore that line if this notebook is not executed on Jean Zay.
+%% Cell type:code id: tags:
+``` python
+#!pip install transformers
+!module load tensorflow-gpu/py3/2.6.0
+```
+%% Cell type:markdown id: tags:
+## Imports and initialisation
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+import tensorflow as tf
+import tensorflow.keras as keras
+import tensorflow.keras.datasets.imdb as imdb
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+from tensorflow.keras.metrics import SparseCategoricalAccuracy
+from tensorflow.keras import mixed_precision
+from transformers import (
+    DistilBertTokenizer,
+    TFDistilBertModel,
+    DataCollatorWithPadding,
+    BertTokenizer,
+    TFBertModel
+)
+import pickle
+import multiprocessing
+import itertools
+import os
+import matplotlib.pyplot as plt
+import seaborn as sns
+print("Tensorflow ", tf.__version__)
+n_gpus = len(tf.config.list_physical_devices('GPU'))
+print("#GPUs: ", n_gpus)
+if n_gpus > 0:
+    !nvidia-smi -L
+policy = mixed_precision.Policy('mixed_float16')
+mixed_precision.set_global_policy(policy)
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+np.random.seed(987654321)
+tf.random.set_seed(987654321)
+```
+%% Cell type:markdown id: tags:
+## Parameters
+* `vocab_size` refers to the number of words which will be remembered in our vocabulary.
+* `hide_most_frequently` is the number of ignored words, among the most common ones.
+* `review_len` is the review length.
+* `n_cpus` is the number of CPU which will be used for data preprocessing.
+* `distil` refers to whether or not we are going to use a DistilBert model or a regular Bert model.
+* `load_locally` will fetch data locally, otherwise will download on the Internet (requires an Internet connection, not possible on Jean Zay)
+%% Cell type:code id: tags:
+``` python
+vocab_size = 30000
+hide_most_frequently = 0
+review_len = 512
+epochs = 1
+batch_size = 32
+fit_verbosity = 1
+scale = 1
+n_cpus = 6
+distil = True
+load_locally = True # if set to False, will fetch data from the internet (requires an internet connection)
+```
+%% Cell type:markdown id: tags:
+## Retrieve the dataset
+%% Cell type:code id: tags:
+``` python
+if load_locally:
+    with open("dataset", "rb") as file_:
+        (x_train, y_train), (x_test, y_test) = pickle.load(file_)
+else:
+    (x_train, y_train), (x_test, y_test) = imdb.load_data(
+        num_words=vocab_size,
+        skip_top=hide_most_frequently,
+        seed=123456789,
+    )
+    with open("dataset", "wb") as file_:
+        pickle.dump(((x_train, y_train), (x_test, y_test)), file_)
+y_train = np.asarray(y_train).astype('float32')
+y_test  = np.asarray(y_test ).astype('float32')
+n1 = int(scale * len(x_train))
+n2 = int(scale * len(x_test))
+x_train, y_train = x_train[:n1], y_train[:n1]
+x_test,  y_test  = x_test[:n2],  y_test[:n2]
+print("x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
+print("x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
+print('\nReview sample (x_train[12]) :\n\n',x_train[12])
+```
+%% Cell type:code id: tags:
+``` python
+if load_locally:
+    with open("word_index", "rb") as file_:
+        word_index = pickle.load(file_)
+else:
+    word_index = imdb.get_word_index()
+    with open("word_index", "wb") as file_:
+        pickle.dump(word_index, file_)
+word_index = {w:(i+3) for w,i in word_index.items()}
+word_index.update({'[PAD]':0, '[CLS]':1, '[UNK]':2})
+index_word = {index:word for word,index in word_index.items()}
+# Add a nice function to transpose:
+def dataset2text(review):
+    return ' '.join([index_word.get(i, "?") for i in review[1:]])
+```
+%% Cell type:code id: tags:
+``` python
+print(dataset2text(x_train[12]))
+```
+%% Cell type:markdown id: tags:
+## Fetch the model from HuggingFace
+%% Cell type:code id: tags:
+``` python
+def load_model(distil, load_locally):
+    if load_locally:
+        if distil:
+            bert_model = TFDistilBertModel.from_pretrained("distilbert_model")
+            tokenizer = DistilBertTokenizer("distilbert_vocab.txt", do_lower_case=True)
+        else:
+            bert_model = TFBertModel.from_pretrained("bert_model")
+            tokenizer = BertTokenizer("bert_vocab.txt", do_lower_case=True)
+        return bert_model, tokenizer
+    if distil:
+        bert_model = TFDistilBertModel.from_pretrained("distilbert-base-uncased")
+        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+        bert_model.save_pretrained("distilbert_model")
+        tokenizer.save_vocabulary("distilbert_vocab.txt")
+    else:
+        bert_model = TFBertModel.from_pretrained("bert-base-uncased")
+        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+        bert_model.save_pretrained("bert_model")
+        tokenizer.save_vocabulary("bert_vocab.txt")
+    return bert_model, tokenizer
+bert_model, tokenizer = load_model(distil, load_locally)
+bert_model.summary()
+```
+%% Cell type:markdown id: tags:
+## Prepare the dataset
+%% Cell type:code id: tags:
+``` python
+def tokenize_sample(sample, tokenizer):
+    return tokenizer(dataset2text(sample), truncation=True, max_length=review_len)
+def distributed_tokenize_dataset(dataset):
+    ds = list(dataset)
+    with multiprocessing.Pool(n_cpus) as pool:
+        tokenized_ds = pool.starmap(
+            tokenize_sample,
+            zip(ds, itertools.repeat(tokenizer, len(ds)))
+        )
+    return tokenized_ds
+tokenized_x_train = distributed_tokenize_dataset(x_train)
+tokenized_x_test = distributed_tokenize_dataset(x_test)
+```
+%% Cell type:code id: tags:
+``` python
+data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf")
+```
+%% Cell type:code id: tags:
+``` python
+data_collator(tokenized_x_train)
+```
+%% Cell type:code id: tags:
+``` python
+def make_dataset(x, y):
+    collated = data_collator(x)
+    dataset = tf.data.Dataset.from_tensor_slices(
+        (collated['input_ids'], collated['attention_mask'], y)
+    )
+    transformed_dataset = (
+        dataset
+        .map(
+            lambda x, y, z: ((x, y), z)
+        )
+        .shuffle(25000)
+        .batch(batch_size)
+    )
+    return transformed_dataset
+train_ds = make_dataset(tokenized_x_train, y_train)
+test_ds = make_dataset(tokenized_x_test, y_test)
+for x, y in train_ds:
+    print(x)
+    break
+```
+%% Cell type:markdown id: tags:
+## Add a new head to the model
+%% Cell type:code id: tags:
+``` python
+class ClassificationModel(keras.Model):
+    def __init__(self, bert_model):
+        super(ClassificationModel, self).__init__()
+        self.bert_model = bert_model
+        self.pre_classifier = Dense(768, activation='relu')
+        self.dropout = Dropout(0.1)
+        self.classifier = Dense(2)
+    def call(self, x):
+        x = self.bert_model(x)
+        x = x.last_hidden_state
+        x = x[:, 0] # get the output of the classification token
+        x = self.pre_classifier(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+```
+%% Cell type:code id: tags:
+``` python
+model = ClassificationModel(bert_model)
+x = next(iter(train_ds))[0]
+model(x)
+model.summary()
+```
+%% Cell type:markdown id: tags:
+## Train!
+%% Cell type:code id: tags:
+``` python
+model.compile(
+    optimizer=Adam(1e-05),
+    loss=SparseCategoricalCrossentropy(from_logits=True),
+    metrics=[SparseCategoricalAccuracy('accuracy')]
+)
+```
+%% Cell type:code id: tags:
+``` python
+history = model.fit(
+    train_ds,
+    epochs=epochs,
+    verbose=fit_verbosity
+)
+```
+%% Cell type:markdown id: tags:
+## Evaluation
+%% Cell type:code id: tags:
+``` python
+_, score = model.evaluate(test_ds)
+colors = sns.color_palette('pastel')[2:]
+accuracy_score = [score, 1 - score]
+plt.pie(
+    accuracy_score,
+    labels=["Accurate", "Mistaken"],
+    colors=colors,
+    autopct=lambda val: f"{val:.2f}%",
+    explode=(0.0, 0.1)
+)
+plt.show()
+```
+%% Cell type:code id: tags:
+``` python
+```
--- a/Transformers.PyTorch/02-distilbert_colab.ipynb
+++ b/Transformers.PyTorch/02-distilbert_colab.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [TRANS2] - IMDB, Sentiment analysis with Transformers
+<!-- DESC --> Using a Tranformer to perform a sentiment analysis (IMDB) - Colab version
+<!-- AUTHOR : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS) -->
+By : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS)
+## Objectives :
+ - Complement the learning of a Transformer to perform a sentiment analysis
+ - Understand the use of a pre-trained transformer
+This task is exactly the same as the Sentiment analysis with text embedding. Only this time,
+we are going to exploit the strenght of transformers. Considering how computation-heavy transformer
+pretraining is, we are going to use a pretrained BERT model from HuggingFace.
+This notebook performs the fine-tuning process. If possible, try to use a GPU to speed up
+the training, transformers are difficult to train on CPU.
+## What we are going to do:
+* Retrieve the dataset
+* Prepare the dataset
+* Fetch a pretrained BERT model from HuggingFace's platform (https://huggingface.co/models)
+* Fine-tune the model on a sequence classification task: the sentiment analysis of the IMDB dataset
+* Evaluate the result
+%% Cell type:markdown id: tags:
+## Installations
+**IMPORTANT :** We will need to use the library `transformers` created by HuggingFace.
+%% Cell type:code id: tags:
+``` python
+!pip install transformers
+```
+%% Cell type:markdown id: tags:
+## Imports and initialisation
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+import tensorflow as tf
+import tensorflow.keras as keras
+import tensorflow.keras.datasets.imdb as imdb
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+from tensorflow.keras.metrics import SparseCategoricalAccuracy
+from transformers import (
+    DistilBertTokenizer,
+    TFDistilBertModel,
+    DataCollatorWithPadding,
+    BertTokenizer,
+    TFBertModel
+)
+from tqdm.notebook import tqdm
+import itertools
+import multiprocessing
+import os
+import matplotlib.pyplot as plt
+import seaborn as sns
+print("Tensorflow ", tf.__version__)
+n_gpus = len(tf.config.list_physical_devices('GPU'))
+print("#GPUs: ", n_gpus)
+if n_gpus > 0:
+    !nvidia-smi -L
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+np.random.seed(987654321)
+tf.random.set_seed(987654321)
+```
+%% Cell type:markdown id: tags:
+## Parameters
+* `vocab_size` refers to the number of words which will be remembered in our vocabulary.
+* `hide_most_frequently` is the number of ignored words, among the most common ones.
+* `review_len` is the review length.
+* `n_cpus` is the number of CPU which will be used for data preprocessing.
+* `distil` refers to whether or not we are going to use a DistilBert model or a regular Bert model.
+%% Cell type:code id: tags:
+``` python
+vocab_size = 30000
+hide_most_frequently = 0
+review_len = 512
+epochs = 1
+batch_size = 32
+fit_verbosity = 1
+scale = 1
+n_cpus = 1
+distil = True
+```
+%% Cell type:markdown id: tags:
+## Retrieve the dataset
+%% Cell type:code id: tags:
+``` python
+(x_train, y_train), (x_test, y_test) = imdb.load_data(
+    num_words=vocab_size,
+    skip_top=hide_most_frequently,
+    seed=123456789,
+)
+y_train = np.asarray(y_train).astype('float32')
+y_test  = np.asarray(y_test ).astype('float32')
+n1 = int(scale * len(x_train))
+n2 = int(scale * len(x_test))
+x_train, y_train = x_train[:n1], y_train[:n1]
+x_test,  y_test  = x_test[:n2],  y_test[:n2]
+print("x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
+print("x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
+print('\nReview sample (x_train[12]) :\n\n',x_train[12])
+```
+%% Cell type:code id: tags:
+``` python
+word_index = imdb.get_word_index()
+word_index = {w:(i+3) for w,i in word_index.items()}
+word_index.update({'[PAD]':0, '[CLS]':1, '[UNK]':2})
+index_word = {index:word for word,index in word_index.items()}
+# Add a nice function to transpose:
+def dataset2text(review):
+    return ' '.join([index_word.get(i, "?") for i in review[1:]])
+```
+%% Cell type:code id: tags:
+``` python
+print(dataset2text(x_train[12]))
+```
+%% Cell type:markdown id: tags:
+## Fetch the model from HuggingFace
+%% Cell type:code id: tags:
+``` python
+def load_model(distil):
+    if distil:
+        bert_model = TFDistilBertModel.from_pretrained("distilbert-base-uncased")
+        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+    else:
+        bert_model = TFBertModel.from_pretrained("bert-base-uncased")
+        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+    return bert_model, tokenizer
+bert_model, tokenizer = load_model(distil)
+bert_model.summary()
+```
+%% Cell type:markdown id: tags:
+## Prepare the dataset
+%% Cell type:code id: tags:
+``` python
+def tokenize_sample(sample):
+    return tokenizer(dataset2text(sample), truncation=True, max_length=review_len)
+def distributed_tokenize_dataset(dataset):
+    ds = list(dataset)
+    with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
+        tokenized_ds = list(tqdm(
+            pool.imap(tokenize_sample, ds),
+            total=len(ds)
+        ))
+    return tokenized_ds
+tokenized_x_train = distributed_tokenize_dataset(x_train)
+tokenized_x_test = distributed_tokenize_dataset(x_test)
+```
+%% Cell type:code id: tags:
+``` python
+data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf")
+```
+%% Cell type:code id: tags:
+``` python
+data_collator(tokenized_x_train)
+```
+%% Cell type:code id: tags:
+``` python
+def make_dataset(x, y):
+    collated = data_collator(x)
+    dataset = tf.data.Dataset.from_tensor_slices(
+        (collated['input_ids'], collated['attention_mask'], y)
+    )
+    transformed_dataset = (
+        dataset
+        .map(
+            lambda x, y, z: ((x, y), z)
+        )
+        .shuffle(25000)
+        .batch(batch_size)
+    )
+    return transformed_dataset
+train_ds = make_dataset(tokenized_x_train, y_train)
+test_ds = make_dataset(tokenized_x_test, y_test)
+for x, y in train_ds:
+    print(x)
+    break
+```
+%% Cell type:markdown id: tags:
+## Add a new head to the model
+%% Cell type:code id: tags:
+``` python
+class ClassificationModel(keras.Model):
+    def __init__(self, bert_model):
+        super(ClassificationModel, self).__init__()
+        self.bert_model = bert_model
+        self.pre_classifier = Dense(768, activation='relu')
+        self.dropout = Dropout(0.1)
+        self.classifier = Dense(2)
+    def call(self, x):
+        x = self.bert_model(x)
+        x = x.last_hidden_state
+        x = x[:, 0] # get the output of the classification token
+        x = self.pre_classifier(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+```
+%% Cell type:code id: tags:
+``` python
+model = ClassificationModel(bert_model)
+x = next(iter(train_ds))[0]
+model(x)
+model.summary()
+```
+%% Cell type:markdown id: tags:
+## Train!
+%% Cell type:code id: tags:
+``` python
+model.compile(
+    optimizer=Adam(1e-05),
+    loss=SparseCategoricalCrossentropy(from_logits=True),
+    metrics=[SparseCategoricalAccuracy('accuracy')]
+)
+```
+%% Cell type:code id: tags:
+``` python
+history = model.fit(
+    train_ds,
+    epochs=epochs,
+    verbose=fit_verbosity
+)
+```
+%% Cell type:markdown id: tags:
+## Evaluation
+%% Cell type:code id: tags:
+``` python
+_, score = model.evaluate(test_ds)
+colors = sns.color_palette('pastel')[2:]
+accuracy_score = [score, 1 - score]
+plt.pie(
+    accuracy_score,
+    labels=["Accurate", "Mistaken"],
+    colors=colors,
+    autopct=lambda val: f"{val:.2f}%",
+    explode=(0.0, 0.1)
+)
+plt.show()
+```
+%% Cell type:code id: tags:
+``` python
+```
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [TRANS2] - IMDB, Sentiment analysis with Transformers
+<!-- DESC --> Using a Tranformer to perform a sentiment analysis (IMDB) - Colab version
+<!-- AUTHOR : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS) -->
+By : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS)
+## Objectives :
+ - Complement the learning of a Transformer to perform a sentiment analysis
+ - Understand the use of a pre-trained transformer
+This task is exactly the same as the Sentiment analysis with text embedding. Only this time,
+we are going to exploit the strenght of transformers. Considering how computation-heavy transformer
+pretraining is, we are going to use a pretrained BERT model from HuggingFace.
+This notebook performs the fine-tuning process. If possible, try to use a GPU to speed up
+the training, transformers are difficult to train on CPU.
+## What we are going to do:
+* Retrieve the dataset
+* Prepare the dataset
+* Fetch a pretrained BERT model from HuggingFace's platform (https://huggingface.co/models)
+* Fine-tune the model on a sequence classification task: the sentiment analysis of the IMDB dataset
+* Evaluate the result
+%% Cell type:markdown id: tags:
+## Installations
+**IMPORTANT :** We will need to use the library `transformers` created by HuggingFace.
+%% Cell type:code id: tags:
+``` python
+!pip install transformers
+```
+%% Cell type:markdown id: tags:
+## Imports and initialisation
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+import tensorflow as tf
+import tensorflow.keras as keras
+import tensorflow.keras.datasets.imdb as imdb
+from tensorflow.keras.layers import Dense, Dropout
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+from tensorflow.keras.metrics import SparseCategoricalAccuracy
+from transformers import (
+    DistilBertTokenizer,
+    TFDistilBertModel,
+    DataCollatorWithPadding,
+    BertTokenizer,
+    TFBertModel
+)
+from tqdm.notebook import tqdm
+import itertools
+import multiprocessing
+import os
+import matplotlib.pyplot as plt
+import seaborn as sns
+print("Tensorflow ", tf.__version__)
+n_gpus = len(tf.config.list_physical_devices('GPU'))
+print("#GPUs: ", n_gpus)
+if n_gpus > 0:
+    !nvidia-smi -L
+os.environ["TOKENIZERS_PARALLELISM"] = "true"
+np.random.seed(987654321)
+tf.random.set_seed(987654321)
+```
+%% Cell type:markdown id: tags:
+## Parameters
+* `vocab_size` refers to the number of words which will be remembered in our vocabulary.
+* `hide_most_frequently` is the number of ignored words, among the most common ones.
+* `review_len` is the review length.
+* `n_cpus` is the number of CPU which will be used for data preprocessing.
+* `distil` refers to whether or not we are going to use a DistilBert model or a regular Bert model.
+%% Cell type:code id: tags:
+``` python
+vocab_size = 30000
+hide_most_frequently = 0
+review_len = 512
+epochs = 1
+batch_size = 32
+fit_verbosity = 1
+scale = 1
+n_cpus = 1
+distil = True
+```
+%% Cell type:markdown id: tags:
+## Retrieve the dataset
+%% Cell type:code id: tags:
+``` python
+(x_train, y_train), (x_test, y_test) = imdb.load_data(
+    num_words=vocab_size,
+    skip_top=hide_most_frequently,
+    seed=123456789,
+)
+y_train = np.asarray(y_train).astype('float32')
+y_test  = np.asarray(y_test ).astype('float32')
+n1 = int(scale * len(x_train))
+n2 = int(scale * len(x_test))
+x_train, y_train = x_train[:n1], y_train[:n1]
+x_test,  y_test  = x_test[:n2],  y_test[:n2]
+print("x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
+print("x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
+print('\nReview sample (x_train[12]) :\n\n',x_train[12])
+```
+%% Cell type:code id: tags:
+``` python
+word_index = imdb.get_word_index()
+word_index = {w:(i+3) for w,i in word_index.items()}
+word_index.update({'[PAD]':0, '[CLS]':1, '[UNK]':2})
+index_word = {index:word for word,index in word_index.items()}
+# Add a nice function to transpose:
+def dataset2text(review):
+    return ' '.join([index_word.get(i, "?") for i in review[1:]])
+```
+%% Cell type:code id: tags:
+``` python
+print(dataset2text(x_train[12]))
+```
+%% Cell type:markdown id: tags:
+## Fetch the model from HuggingFace
+%% Cell type:code id: tags:
+``` python
+def load_model(distil):
+    if distil:
+        bert_model = TFDistilBertModel.from_pretrained("distilbert-base-uncased")
+        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+    else:
+        bert_model = TFBertModel.from_pretrained("bert-base-uncased")
+        tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+    return bert_model, tokenizer
+bert_model, tokenizer = load_model(distil)
+bert_model.summary()
+```
+%% Cell type:markdown id: tags:
+## Prepare the dataset
+%% Cell type:code id: tags:
+``` python
+def tokenize_sample(sample):
+    return tokenizer(dataset2text(sample), truncation=True, max_length=review_len)
+def distributed_tokenize_dataset(dataset):
+    ds = list(dataset)
+    with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
+        tokenized_ds = list(tqdm(
+            pool.imap(tokenize_sample, ds),
+            total=len(ds)
+        ))
+    return tokenized_ds
+tokenized_x_train = distributed_tokenize_dataset(x_train)
+tokenized_x_test = distributed_tokenize_dataset(x_test)
+```
+%% Cell type:code id: tags:
+``` python
+data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf")
+```
+%% Cell type:code id: tags:
+``` python
+data_collator(tokenized_x_train)
+```
+%% Cell type:code id: tags:
+``` python
+def make_dataset(x, y):
+    collated = data_collator(x)
+    dataset = tf.data.Dataset.from_tensor_slices(
+        (collated['input_ids'], collated['attention_mask'], y)
+    )
+    transformed_dataset = (
+        dataset
+        .map(
+            lambda x, y, z: ((x, y), z)
+        )
+        .shuffle(25000)
+        .batch(batch_size)
+    )
+    return transformed_dataset
+train_ds = make_dataset(tokenized_x_train, y_train)
+test_ds = make_dataset(tokenized_x_test, y_test)
+for x, y in train_ds:
+    print(x)
+    break
+```
+%% Cell type:markdown id: tags:
+## Add a new head to the model
+%% Cell type:code id: tags:
+``` python
+class ClassificationModel(keras.Model):
+    def __init__(self, bert_model):
+        super(ClassificationModel, self).__init__()
+        self.bert_model = bert_model
+        self.pre_classifier = Dense(768, activation='relu')
+        self.dropout = Dropout(0.1)
+        self.classifier = Dense(2)
+    def call(self, x):
+        x = self.bert_model(x)
+        x = x.last_hidden_state
+        x = x[:, 0] # get the output of the classification token
+        x = self.pre_classifier(x)
+        x = self.dropout(x)
+        x = self.classifier(x)
+        return x
+```
+%% Cell type:code id: tags:
+``` python
+model = ClassificationModel(bert_model)
+x = next(iter(train_ds))[0]
+model(x)
+model.summary()
+```
+%% Cell type:markdown id: tags:
+## Train!
+%% Cell type:code id: tags:
+``` python
+model.compile(
+    optimizer=Adam(1e-05),
+    loss=SparseCategoricalCrossentropy(from_logits=True),
+    metrics=[SparseCategoricalAccuracy('accuracy')]
+)
+```
+%% Cell type:code id: tags:
+``` python
+history = model.fit(
+    train_ds,
+    epochs=epochs,
+    verbose=fit_verbosity
+)
+```
+%% Cell type:markdown id: tags:
+## Evaluation
+%% Cell type:code id: tags:
+``` python
+_, score = model.evaluate(test_ds)
+colors = sns.color_palette('pastel')[2:]
+accuracy_score = [score, 1 - score]
+plt.pie(
+    accuracy_score,
+    labels=["Accurate", "Mistaken"],
+    colors=colors,
+    autopct=lambda val: f"{val:.2f}%",
+    explode=(0.0, 0.1)
+)
+plt.show()
+```
+%% Cell type:code id: tags:
+``` python
+```
--- a/VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb
+++ b/VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [K3VAE1] - First VAE, using functional API (MNIST dataset)
+<!-- DESC --> Construction and training of a VAE, using functional APPI, with a latent space of small dimension.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Understanding and implementing a **variational autoencoder** neurals network (VAE)
+ - Understanding **Keras functional API**, using two custom layers
+The calculation needs being important, it is preferable to use a very simple dataset such as MNIST to start with.
+...MNIST with a small scale if you haven't a GPU ;-)
+## What we're going to do :
+ - Defining a VAE model
+ - Build the model
+ - Train it
+ - Have a look on the train process
+## Acknowledgements :
+Thanks to **François Chollet** who is at the base of this example (and the creator of Keras !!).
+See : https://keras.io/examples/generative/vae
+%% Cell type:markdown id: tags:
+## Step 1 - Init python stuff
+%% Cell type:code id: tags:
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+import keras
+from keras import layers
+import numpy as np
+from modules.layers    import SamplingLayer, VariationalLossLayer
+from modules.callbacks import ImagesCallback
+from modules.datagen   import MNIST
+import sys
+import fidle
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3VAE1')
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Parameters
+`scale` : With scale=1, we need 1'30s on a GPU V100 ...and >20' on a CPU !\
+`latent_dim` : 2 dimensions is small, but usefull to draw !\
+`fit_verbosity`: Verbosity of training progress bar: 0=silent, 1=progress bar, 2=One line
+`loss_weights` : Our **loss function** is the weighted sum of two loss:
+ - `r_loss` which measures the loss during reconstruction.
+ - `kl_loss` which measures the dispersion.
+The weights are defined by: `loss_weights=[k1,k2]` where : `total_loss = k1*r_loss + k2*kl_loss`
+In practice, a value of \[1,.06\] gives good results here.
+With scale=0.2, epochs=10 : 3'30 on a laptop
+%% Cell type:code id: tags:
+``` python
+latent_dim    = 2
+loss_weights  = [1,.06]
+scale         = 0.2
+seed          = 123
+batch_size    = 64
+epochs        = 10
+fit_verbosity = 1
+```
+%% Cell type:markdown id: tags:
+Override parameters (batch mode) - Just forget this cell
+%% Cell type:code id: tags:
+``` python
+fidle.override('latent_dim', 'loss_weights', 'scale', 'seed', 'batch_size', 'epochs', 'fit_verbosity')
+```
+%% Cell type:markdown id: tags:
+## Step 3 - Prepare data
+`MNIST.get_data()` return : `x_train,y_train, x_test,y_test`,  \
+but we only need x_train for our training.
+%% Cell type:code id: tags:
+``` python
+x_data, y_data, _,_ = MNIST.get_data(seed=seed, scale=scale, train_prop=1 )
+fidle.scrawler.images(x_data[:20], None, indices='all', columns=10, x_size=1,y_size=1,y_padding=0, save_as='01-original')
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Build model
+In this example, we will use the **functional API.**
+For this, we will use two custom layers :
+ - `SamplingLayer`, which generates a vector z from the parameters z_mean and z_log_var - See : [SamplingLayer.py](./modules/layers/SamplingLayer.py)
+ - `VariationalLossLayer`, which allows us to calculate the loss function, loss - See : [VariationalLossLayer.py](./modules/layers/VariationalLossLayer.py)
+%% Cell type:markdown id: tags:
+#### Encoder
+%% Cell type:code id: tags:
+``` python
+inputs    = keras.Input(shape=(28, 28, 1))
+x         = layers.Conv2D(32, 3, strides=1, padding="same", activation="relu")(inputs)
+x         = layers.Conv2D(64, 3, strides=2, padding="same", activation="relu")(x)
+x         = layers.Conv2D(64, 3, strides=2, padding="same", activation="relu")(x)
+x         = layers.Conv2D(64, 3, strides=1, padding="same", activation="relu")(x)
+x         = layers.Flatten()(x)
+x         = layers.Dense(16, activation="relu")(x)
+z_mean    = layers.Dense(latent_dim, name="z_mean")(x)
+z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
+z         = SamplingLayer()([z_mean, z_log_var])
+encoder = keras.Model(inputs, [z_mean, z_log_var, z], name="encoder")
+# encoder.summary()
+```
+%% Cell type:markdown id: tags:
+#### Decoder
+%% Cell type:code id: tags:
+``` python
+inputs  = keras.Input(shape=(latent_dim,))
+x       = layers.Dense(7 * 7 * 64, activation="relu")(inputs)
+x       = layers.Reshape((7, 7, 64))(x)
+x       = layers.Conv2DTranspose(64, 3, strides=1, padding="same", activation="relu")(x)
+x       = layers.Conv2DTranspose(64, 3, strides=2, padding="same", activation="relu")(x)
+x       = layers.Conv2DTranspose(32, 3, strides=2, padding="same", activation="relu")(x)
+outputs = layers.Conv2DTranspose(1,  3, padding="same", activation="sigmoid")(x)
+decoder = keras.Model(inputs, outputs, name="decoder")
+# decoder.summary()
+```
+%% Cell type:markdown id: tags:
+#### VAE
+We will calculate the loss with a specific layer: `VariationalLossLayer`
+See our : modules.layers.[VariationalLossLayer.py](./modules/layers/VariationalLossLayer.py)
+%% Cell type:code id: tags:
+``` python
+inputs = keras.Input(shape=(28, 28, 1))
+z_mean, z_log_var, z = encoder(inputs)
+outputs              = decoder(z)
+outputs = VariationalLossLayer(loss_weights=loss_weights)([inputs, z_mean, z_log_var, outputs])
+vae=keras.Model(inputs,outputs)
+vae.compile(optimizer='adam', loss=None)
+```
+%% Cell type:markdown id: tags:
+## Step 5 - Train
+### 5.1 - Using two nice custom callbacks :-)
+Two custom callbacks are used:
+ - `ImagesCallback` : qui va sauvegarder des images durant l'apprentissage - See [ImagesCallback.py](./modules/callbacks/ImagesCallback.py)
+ - `BestModelCallback` : qui sauvegardera le meilleur model - See [BestModelCallback.py](./modules/callbacks/BestModelCallback.py)
+%% Cell type:code id: tags:
+``` python
+callback_images      = ImagesCallback(x=x_data, z_dim=latent_dim, nb_images=5, from_z=True, from_random=True, run_dir=run_dir)
+callbacks_list = [callback_images]
+```
+%% Cell type:markdown id: tags:
+### 5.2 - Let's train !
+With `scale=1`, need 1'15 on a GPU (V100 at IDRIS) ...or 20' on a CPU
+%% Cell type:code id: tags:
+``` python
+chrono=fidle.Chrono()
+chrono.start()
+history = vae.fit(x_data, epochs=epochs, batch_size=batch_size, callbacks=callbacks_list, verbose=fit_verbosity)
+chrono.show()
+```
+%% Cell type:markdown id: tags:
+## Step 6 - Training review
+### 6.1 - History
+%% Cell type:code id: tags:
+``` python
+fidle.scrawler.history(history,  plot={"Loss":['loss']}, save_as='history')
+```
+%% Cell type:markdown id: tags:
+### 6.2 - Reconstruction during training
+At the end of each epoch, our callback saved some reconstructed images.
+Where :
+Original image -> encoder -> z -> decoder -> Reconstructed image
+%% Cell type:code id: tags:
+``` python
+images_z, images_r = callback_images.get_images( range(0,epochs,2) )
+fidle.utils.subtitle('Original images :')
+fidle.scrawler.images(x_data[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)
+fidle.utils.subtitle('Encoded/decoded images')
+fidle.scrawler.images(images_z, None, indices='all', columns=5, x_size=2,y_size=2, save_as='02-reconstruct')
+fidle.utils.subtitle('Original images :')
+fidle.scrawler.images(x_data[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)
+```
+%% Cell type:markdown id: tags:
+### 6.3 - Generation (latent -> decoder)
+%% Cell type:code id: tags:
+``` python
+fidle.utils.subtitle('Generated images from latent space')
+fidle.scrawler.images(images_r, None, indices='all', columns=5, x_size=2,y_size=2, save_as='03-generated')
+```
+%% Cell type:markdown id: tags:
+## Annexe - Model Save and reload
+Save our model
+%% Cell type:code id: tags:
+``` python
+os.makedirs(f'{run_dir}/models', exist_ok=True)
+filename = run_dir+'/models/my_model.keras'
+vae.save(filename)
+```
+%% Cell type:markdown id: tags:
+Reload it
+%% Cell type:code id: tags:
+``` python
+vae_reloaded = keras.models.load_model( filename,
+                                        custom_objects={ 'SamplingLayer': SamplingLayer,
+                                                         'VariationalLossLayer':VariationalLossLayer})
+```
+%% Cell type:markdown id: tags:
+Play with our decoder !
+%% Cell type:code id: tags:
+``` python
+decoder = vae.get_layer('decoder')
+img = decoder( np.array([[-1,.1]]))
+fidle.scrawler.images(img.detach().cpu().numpy(), x_size=2,y_size=2, save_as='04-example')
+```
+%% Cell type:code id: tags:
+``` python
+fidle.end()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/header.svg"></img>
+# <!-- TITLE --> [K3VAE1] - First VAE, using functional API (MNIST dataset)
+<!-- DESC --> Construction and training of a VAE, using functional APPI, with a latent space of small dimension.
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Understanding and implementing a **variational autoencoder** neurals network (VAE)
+ - Understanding **Keras functional API**, using two custom layers
+The calculation needs being important, it is preferable to use a very simple dataset such as MNIST to start with.
+...MNIST with a small scale if you haven't a GPU ;-)
+## What we're going to do :
+ - Defining a VAE model
+ - Build the model
+ - Train it
+ - Have a look on the train process
+## Acknowledgements :
+Thanks to **François Chollet** who is at the base of this example (and the creator of Keras !!).
+See : https://keras.io/examples/generative/vae
+%% Cell type:markdown id: tags:
+## Step 1 - Init python stuff
+%% Cell type:code id: tags:
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+import keras
+from keras import layers
+import numpy as np
+from modules.layers    import SamplingLayer, VariationalLossLayer
+from modules.callbacks import ImagesCallback
+from modules.datagen   import MNIST
+import sys
+import fidle
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3VAE1')
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Parameters
+`scale` : With scale=1, we need 1'30s on a GPU V100 ...and >20' on a CPU !\
+`latent_dim` : 2 dimensions is small, but usefull to draw !\
+`fit_verbosity`: Verbosity of training progress bar: 0=silent, 1=progress bar, 2=One line
+`loss_weights` : Our **loss function** is the weighted sum of two loss:
+ - `r_loss` which measures the loss during reconstruction.
+ - `kl_loss` which measures the dispersion.
+The weights are defined by: `loss_weights=[k1,k2]` where : `total_loss = k1*r_loss + k2*kl_loss`
+In practice, a value of \[1,.06\] gives good results here.
+With scale=0.2, epochs=10 : 3'30 on a laptop
+%% Cell type:code id: tags:
+``` python
+latent_dim    = 2
+loss_weights  = [1,.06]
+scale         = 0.2
+seed          = 123
+batch_size    = 64
+epochs        = 10
+fit_verbosity = 1
+```
+%% Cell type:markdown id: tags:
+Override parameters (batch mode) - Just forget this cell
+%% Cell type:code id: tags:
+``` python
+fidle.override('latent_dim', 'loss_weights', 'scale', 'seed', 'batch_size', 'epochs', 'fit_verbosity')
+```
+%% Cell type:markdown id: tags:
+## Step 3 - Prepare data
+`MNIST.get_data()` return : `x_train,y_train, x_test,y_test`,  \
+but we only need x_train for our training.
+%% Cell type:code id: tags:
+``` python
+x_data, y_data, _,_ = MNIST.get_data(seed=seed, scale=scale, train_prop=1 )
+fidle.scrawler.images(x_data[:20], None, indices='all', columns=10, x_size=1,y_size=1,y_padding=0, save_as='01-original')
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Build model
+In this example, we will use the **functional API.**
+For this, we will use two custom layers :
+ - `SamplingLayer`, which generates a vector z from the parameters z_mean and z_log_var - See : [SamplingLayer.py](./modules/layers/SamplingLayer.py)
+ - `VariationalLossLayer`, which allows us to calculate the loss function, loss - See : [VariationalLossLayer.py](./modules/layers/VariationalLossLayer.py)
+%% Cell type:markdown id: tags:
+#### Encoder
+%% Cell type:code id: tags:
+``` python
+inputs    = keras.Input(shape=(28, 28, 1))
+x         = layers.Conv2D(32, 3, strides=1, padding="same", activation="relu")(inputs)
+x         = layers.Conv2D(64, 3, strides=2, padding="same", activation="relu")(x)
+x         = layers.Conv2D(64, 3, strides=2, padding="same", activation="relu")(x)
+x         = layers.Conv2D(64, 3, strides=1, padding="same", activation="relu")(x)
+x         = layers.Flatten()(x)
+x         = layers.Dense(16, activation="relu")(x)
+z_mean    = layers.Dense(latent_dim, name="z_mean")(x)
+z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
+z         = SamplingLayer()([z_mean, z_log_var])
+encoder = keras.Model(inputs, [z_mean, z_log_var, z], name="encoder")
+# encoder.summary()
+```
+%% Cell type:markdown id: tags:
+#### Decoder
+%% Cell type:code id: tags:
+``` python
+inputs  = keras.Input(shape=(latent_dim,))
+x       = layers.Dense(7 * 7 * 64, activation="relu")(inputs)
+x       = layers.Reshape((7, 7, 64))(x)
+x       = layers.Conv2DTranspose(64, 3, strides=1, padding="same", activation="relu")(x)
+x       = layers.Conv2DTranspose(64, 3, strides=2, padding="same", activation="relu")(x)
+x       = layers.Conv2DTranspose(32, 3, strides=2, padding="same", activation="relu")(x)
+outputs = layers.Conv2DTranspose(1,  3, padding="same", activation="sigmoid")(x)
+decoder = keras.Model(inputs, outputs, name="decoder")
+# decoder.summary()
+```
+%% Cell type:markdown id: tags:
+#### VAE
+We will calculate the loss with a specific layer: `VariationalLossLayer`
+See our : modules.layers.[VariationalLossLayer.py](./modules/layers/VariationalLossLayer.py)
+%% Cell type:code id: tags:
+``` python
+inputs = keras.Input(shape=(28, 28, 1))
+z_mean, z_log_var, z = encoder(inputs)
+outputs              = decoder(z)
+outputs = VariationalLossLayer(loss_weights=loss_weights)([inputs, z_mean, z_log_var, outputs])
+vae=keras.Model(inputs,outputs)
+vae.compile(optimizer='adam', loss=None)
+```
+%% Cell type:markdown id: tags:
+## Step 5 - Train
+### 5.1 - Using two nice custom callbacks :-)
+Two custom callbacks are used:
+ - `ImagesCallback` : qui va sauvegarder des images durant l'apprentissage - See [ImagesCallback.py](./modules/callbacks/ImagesCallback.py)
+ - `BestModelCallback` : qui sauvegardera le meilleur model - See [BestModelCallback.py](./modules/callbacks/BestModelCallback.py)
+%% Cell type:code id: tags:
+``` python
+callback_images      = ImagesCallback(x=x_data, z_dim=latent_dim, nb_images=5, from_z=True, from_random=True, run_dir=run_dir)
+callbacks_list = [callback_images]
+```
+%% Cell type:markdown id: tags:
+### 5.2 - Let's train !
+With `scale=1`, need 1'15 on a GPU (V100 at IDRIS) ...or 20' on a CPU
+%% Cell type:code id: tags:
+``` python
+chrono=fidle.Chrono()
+chrono.start()
+history = vae.fit(x_data, epochs=epochs, batch_size=batch_size, callbacks=callbacks_list, verbose=fit_verbosity)
+chrono.show()
+```
+%% Cell type:markdown id: tags:
+## Step 6 - Training review
+### 6.1 - History
+%% Cell type:code id: tags:
+``` python
+fidle.scrawler.history(history,  plot={"Loss":['loss']}, save_as='history')
+```
+%% Cell type:markdown id: tags:
+### 6.2 - Reconstruction during training
+At the end of each epoch, our callback saved some reconstructed images.
+Where :
+Original image -> encoder -> z -> decoder -> Reconstructed image
+%% Cell type:code id: tags:
+``` python
+images_z, images_r = callback_images.get_images( range(0,epochs,2) )
+fidle.utils.subtitle('Original images :')
+fidle.scrawler.images(x_data[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)
+fidle.utils.subtitle('Encoded/decoded images')
+fidle.scrawler.images(images_z, None, indices='all', columns=5, x_size=2,y_size=2, save_as='02-reconstruct')
+fidle.utils.subtitle('Original images :')
+fidle.scrawler.images(x_data[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)
+```
+%% Cell type:markdown id: tags:
+### 6.3 - Generation (latent -> decoder)
+%% Cell type:code id: tags:
+``` python
+fidle.utils.subtitle('Generated images from latent space')
+fidle.scrawler.images(images_r, None, indices='all', columns=5, x_size=2,y_size=2, save_as='03-generated')
+```
+%% Cell type:markdown id: tags:
+## Annexe - Model Save and reload
+Save our model
+%% Cell type:code id: tags:
+``` python
+os.makedirs(f'{run_dir}/models', exist_ok=True)
+filename = run_dir+'/models/my_model.keras'
+vae.save(filename)
+```
+%% Cell type:markdown id: tags:
+Reload it
+%% Cell type:code id: tags:
+``` python
+vae_reloaded = keras.models.load_model( filename,
+                                        custom_objects={ 'SamplingLayer': SamplingLayer,
+                                                         'VariationalLossLayer':VariationalLossLayer})
+```
+%% Cell type:markdown id: tags:
+Play with our decoder !
+%% Cell type:code id: tags:
+``` python
+decoder = vae.get_layer('decoder')
+img = decoder( np.array([[-1,.1]]))
+fidle.scrawler.images(img.detach().cpu().numpy(), x_size=2,y_size=2, save_as='04-example')
+```
+%% Cell type:code id: tags:
+``` python
+fidle.end()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
No results found