Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • daconcea/fidle
  • bossardl/fidle
  • Julie.Remenant/fidle
  • abijolao/fidle
  • monsimau/fidle
  • karkars/fidle
  • guilgautier/fidle
  • cailletr/fidle
  • talks/fidle
9 results
Show changes
Showing
with 5491 additions and 1530 deletions
source diff could not be displayed: it is too large. Options to address this: view the blob.
VERSION='0.1a'
\ No newline at end of file
# ==================================================================
# ____ _ _ _ __ __ _
# | _ \ _ __ __ _ ___| |_(_) ___ __ _| | \ \ / /__ _ __| | __
# | |_) | '__/ _` |/ __| __| |/ __/ _` | | \ \ /\ / / _ \| '__| |/ /
# | __/| | | (_| | (__| |_| | (_| (_| | | \ V V / (_) | | | <
# |_| |_| \__,_|\___|\__|_|\___\__,_|_| \_/\_/ \___/|_| |_|\_\
# module pwk
# ==================================================================
# A simple module to host some common functions for practical work
# pjluc 2020
import os
import glob
from datetime import datetime
import itertools
import datetime, time
import math
import numpy as np
from collections.abc import Iterable
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import confusion_matrix
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sn
from IPython.display import display, Markdown
VERSION='0.2.4'
# -------------------------------------------------------------
# init_all
# -------------------------------------------------------------
#
def init(mplstyle='fidle/talk.mplstyle'):
global VERSION
# ---- matplotlib
matplotlib.style.use(mplstyle)
# ---- Hello world
# now = datetime.datetime.now()
print('IDLE 2020 - Practical Work Module')
print(' Version :', VERSION)
print(' Run time : {}'.format(time.strftime("%A %-d %B %Y, %H:%M:%S")))
print(' Matplotlib style :', mplstyle)
print(' TensorFlow version :',tf.__version__)
print(' Keras version :',tf.keras.__version__)
# -------------------------------------------------------------
# Folder cooking
# -------------------------------------------------------------
#
def tag_now():
return datetime.datetime.now().strftime("%Y-%m-%d_%Hh%Mm%Ss")
def mkdir(path):
os.makedirs(path, mode=0o750, exist_ok=True)
def get_directory_size(path):
"""
Return the directory size, but only 1 level
args:
path : directory path
return:
size in Mo
"""
size=0
for f in os.listdir(path):
if os.path.isfile(path+'/'+f):
size+=os.path.getsize(path+'/'+f)
return size/(1024*1024)
# -------------------------------------------------------------
# shuffle_dataset
# -------------------------------------------------------------
#
def shuffle_np_dataset(x, y):
"""
Shuffle a dataset (x,y)
args:
x,y : dataset
return:
x,y mixed
"""
assert (len(x) == len(y)), "x and y must have same size"
p = np.random.permutation(len(x))
return x[p], y[p]
def update_progress(what,i,imax):
"""
Display a text progress bar, as :
My progress bar : ############# 34%
args:
what : Progress bas name
i : Current progress
imax : Max value for i
return:
nothing
"""
bar_length = min(40,imax)
if (i%int(imax/bar_length))!=0 and i<imax:
return
progress = float(i/imax)
block = int(round(bar_length * progress))
endofline = '\r' if progress<1 else '\n'
text = "{:16s} [{}] {:>5.1f}% of {}".format( what, "#"*block+"-"*(bar_length-block), progress*100, imax)
print(text, end=endofline)
def rmax(l):
"""
Recursive max() for a given iterable of iterables
Should be np.array of np.array or list of list, etc.
args:
l : Iterable of iterables
return:
max value
"""
maxi = float('-inf')
for item in l:
if isinstance(item, Iterable):
t = rmax(item)
else:
t = item
if t > maxi:
maxi = t
return maxi
def rmin(l):
"""
Recursive min() for a given iterable of iterables
Should be np.array of np.array or list of list, etc.
args:
l : Iterable of iterables
return:
min value
"""
mini = float('inf')
for item in l:
if isinstance(item, Iterable):
t = rmin(item)
else:
t = item
if t < mini:
mini = t
return mini
# -------------------------------------------------------------
# show_images
# -------------------------------------------------------------
#
def plot_images(x,y, indices, columns=12, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary'):
"""
Show some images in a grid, with legends
args:
X: images - Shapes must be (-1 lx,ly,1) or (-1 lx,ly,3)
y: real classes
indices: indices of images to show
columns: number of columns (12)
x_size,y_size: figure size
colorbar: show colorbar (False)
y_pred: predicted classes (None)
cm: Matplotlib olor map
returns:
nothing
"""
rows = math.ceil(len(indices)/columns)
fig=plt.figure(figsize=(columns*x_size, rows*(y_size+0.35)))
n=1
errors=0
if np.any(y_pred)==None:
y_pred=y
for i in indices:
axs=fig.add_subplot(rows, columns, n)
n+=1
# ---- Shape is (lx,ly)
if len(x[i].shape)==2:
xx=x[i]
# ---- Shape is (lx,ly,n)
if len(x[i].shape)==3:
(lx,ly,lz)=x[i].shape
if lz==1:
xx=x[i].reshape(lx,ly)
else:
xx=x[i]
img=axs.imshow(xx, cmap = cm, interpolation='lanczos')
axs.spines['right'].set_visible(True)
axs.spines['left'].set_visible(True)
axs.spines['top'].set_visible(True)
axs.spines['bottom'].set_visible(True)
axs.set_yticks([])
axs.set_xticks([])
if y[i]!=y_pred[i]:
axs.set_xlabel('{} ({})'.format(y_pred[i],y[i]))
axs.xaxis.label.set_color('red')
errors+=1
else:
axs.set_xlabel(y[i])
if colorbar:
fig.colorbar(img,orientation="vertical", shrink=0.65)
plt.show()
def plot_image(x,cm='binary', figsize=(4,4)):
"""
Draw a single image.
Image shape can be (lx,ly), (lx,ly,1) or (lx,ly,n)
args:
x : image as np array
cm : color map ('binary')
figsize : fig size (4,4)
"""
# ---- Shape is (lx,ly)
if len(x.shape)==2:
xx=x
# ---- Shape is (lx,ly,n)
if len(x.shape)==3:
(lx,ly,lz)=x.shape
if lz==1:
xx=x.reshape(lx,ly)
else:
xx=x
# ---- Draw it
plt.figure(figsize=figsize)
plt.imshow(xx, cmap = cm, interpolation='lanczos')
plt.show()
# -------------------------------------------------------------
# show_history
# -------------------------------------------------------------
#
def plot_history(history, figsize=(8,6),
plot={"Accuracy":['accuracy','val_accuracy'], 'Loss':['loss', 'val_loss']}):
"""
Show history
args:
history: history
figsize: fig size
plot: list of data to plot : {<title>:[<metrics>,...], ...}
"""
for title,curves in plot.items():
plt.figure(figsize=figsize)
plt.title(title)
plt.ylabel(title)
plt.xlabel('Epoch')
for c in curves:
plt.plot(history.history[c])
plt.legend(curves, loc='upper left')
plt.show()
# -------------------------------------------------------------
# plot_confusion_matrix
# -------------------------------------------------------------
# Bug in Matplotlib 3.1.1
#
def plot_confusion_matrix(cm,
title='Confusion matrix',
figsize=(12,8),
cmap="gist_heat_r",
vmin=0,
vmax=1,
xticks=5,yticks=5):
"""
given a sklearn confusion matrix (cm), make a nice plot
Note:bug in matplotlib 3.1.1
Args:
cm: confusion matrix from sklearn.metrics.confusion_matrix
title: the text to display at the top of the matrix
figsize: Figure size (12,8)
cmap: color map (gist_heat_r)
vmi,vmax: Min/max 0 and 1
"""
accuracy = np.trace(cm) / float(np.sum(cm))
misclass = 1 - accuracy
plt.figure(figsize=figsize)
sn.heatmap(cm, linewidths=1, linecolor="#ffffff",square=True,
cmap=cmap, xticklabels=xticks, yticklabels=yticks,
vmin=vmin,vmax=vmax,annot=True)
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.show()
def display_confusion_matrix(y_true,y_pred,labels=None,color='green',
font_size='12pt', title="#### Confusion matrix is :"):
"""
Show a confusion matrix for a predictions.
see : sklearn.metrics.confusion_matrix
Args:
y_true Real classes
y_pred Predicted classes
labels List of classes to show in the cm
color: Color for the palette (green)
font_size: Values font size
title: the text to display at the top of the matrix
"""
assert (labels!=None),"Label must be set"
if title != None : display(Markdown(title))
cm = confusion_matrix( y_true,y_pred, normalize="true", labels=labels)
df=pd.DataFrame(cm)
cmap = sn.light_palette(color, as_cmap=True)
df.style.set_properties(**{'font-size': '20pt'})
display(df.style.format('{:.2f}') \
.background_gradient(cmap=cmap)
.set_properties(**{'font-size': font_size}))
def plot_donut(values, labels, colors=["lightsteelblue","coral"], figsize=(6,6), title=None):
"""
Draw a donut
args:
values : list of values
labels : list of labels
colors : list of color (["lightsteelblue","coral"])
figsize : size of figure ( (6,6) )
return:
nothing
"""
# ---- Title or not
if title != None : display(Markdown(title))
# ---- Donut
plt.figure(figsize=figsize)
# ---- Draw a pie chart..
plt.pie(values, labels=labels,
colors = colors, autopct='%1.1f%%', startangle=70, pctdistance=0.85,
textprops={'fontsize': 18},
wedgeprops={"edgecolor":"w",'linewidth': 5, 'linestyle': 'solid', 'antialiased': True})
# ---- ..with a white circle
circle = plt.Circle((0,0),0.70,fc='white')
ax = plt.gca()
ax.add_artist(circle)
# Equal aspect ratio ensures that pie is drawn as a circle
plt.axis('equal')
plt.tight_layout()
plt.show()
\ No newline at end of file
# See : https://matplotlib.org/users/customizing.html
axes.titlesize : 24
axes.labelsize : 20
axes.edgecolor : dimgrey
axes.labelcolor : dimgrey
axes.linewidth : 2
axes.grid : False
axes.prop_cycle : cycler('color', ['steelblue', 'tomato', '2ca02c', 'd62728', '9467bd', '8c564b', 'e377c2', '7f7f7f', 'bcbd22', '17becf'])
lines.linewidth : 3
lines.markersize : 10
xtick.color : black
xtick.labelsize : 18
ytick.color : black
ytick.labelsize : 18
axes.spines.left : True
axes.spines.bottom : True
axes.spines.top : False
axes.spines.right : False
savefig.dpi : 300 # figure dots per inch or 'figure'
savefig.facecolor : white # figure facecolor when saving
savefig.edgecolor : white # figure edgecolor when saving
savefig.format : svg
savefig.bbox : tight
savefig.pad_inches : 0.1
savefig.transparent : True
savefig.jpeg_quality: 95
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [NP1] - A short introduction to Numpy
<!-- DESC --> Numpy is an essential tool for the Scientific Python.
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Understand the main principles of Numpy and its potential
Note : This notebook is strongly inspired by the UGA Python Introduction Course
See : **https://gricad-gitlab.univ-grenoble-alpes.fr/python-uga/py-training-2017**
%% Cell type:markdown id: tags:
## Step 1 - Numpy the beginning
Code using `numpy` usually starts with the import statement
%% Cell type:code id: tags:
``` python
import numpy as np
```
%% Cell type:markdown id: tags:
NumPy provides the type `np.ndarray`. Such array are multidimensionnal sequences of homogeneous elements. They can be created for example with the commands:
%% Cell type:code id: tags:
``` python
# from a list
l = [10.0, 12.5, 15.0, 17.5, 20.0]
np.array(l)
```
%% Cell type:code id: tags:
``` python
# fast but the values can be anything
np.empty(4)
```
%% Cell type:code id: tags:
``` python
# slower than np.empty but the values are all 0.
np.zeros([2, 6])
```
%% Cell type:code id: tags:
``` python
# multidimensional array
a = np.ones([2, 3, 4])
print(a.shape, a.size, a.dtype)
a
```
%% Cell type:code id: tags:
``` python
# like range but produce 1D numpy array
np.arange(4)
```
%% Cell type:code id: tags:
``` python
# np.arange can produce arrays of floats
np.arange(4.)
```
%% Cell type:code id: tags:
``` python
# another convenient function to generate 1D arrays
np.linspace(10, 20, 5)
```
%% Cell type:markdown id: tags:
A NumPy array can be easily converted to a Python list.
%% Cell type:code id: tags:
``` python
a = np.linspace(10, 20 ,5)
list(a)
```
%% Cell type:code id: tags:
``` python
# Or even better
a.tolist()
```
%% Cell type:markdown id: tags:
## Step 2 - Access elements
Elements in a `numpy` array can be accessed using indexing and slicing in any dimension. It also offers the same functionalities available in Fortan or Matlab.
### 2.1 - Indexes and slices
For example, we can create an array `A` and perform any kind of selection operations on it.
%% Cell type:code id: tags:
``` python
A = np.random.random([4, 5])
A
```
%% Cell type:code id: tags:
``` python
# Get the element from second line, first column
A[1, 0]
```
%% Cell type:code id: tags:
``` python
# Get the first two lines
A[:2]
```
%% Cell type:code id: tags:
``` python
# Get the last column
A[:, -1]
```
%% Cell type:code id: tags:
``` python
# Get the first two lines and the columns with an even index
A[:2, ::2]
```
%% Cell type:markdown id: tags:
### 2.2 - Using a mask to select elements validating a condition:
%% Cell type:code id: tags:
``` python
cond = A > 0.5
print(cond)
print(A[cond])
```
%% Cell type:markdown id: tags:
The mask is in fact a particular case of the advanced indexing capabilities provided by NumPy. For example, it is even possible to use lists for indexing:
%% Cell type:code id: tags:
``` python
# Selecting only particular columns
print(A)
A[:, [0, 1, 4]]
```
%% Cell type:markdown id: tags:
## Step 3 - Perform array manipulations
### 3.1 - Apply arithmetic operations to whole arrays (element-wise):
%% Cell type:code id: tags:
``` python
(A+5)**2
```
%% Cell type:markdown id: tags:
### 3.2 - Apply functions element-wise:
%% Cell type:code id: tags:
``` python
np.exp(A) # With numpy arrays, use the functions from numpy !
```
%% Cell type:markdown id: tags:
### 3.3 - Setting parts of arrays
%% Cell type:code id: tags:
``` python
A[:, 0] = 0.
print(A)
```
%% Cell type:code id: tags:
``` python
# BONUS: Safe element-wise inverse with masks
cond = (A != 0)
A[cond] = 1./A[cond]
print(A)
```
%% Cell type:markdown id: tags:
## Step 4 - Attributes and methods of `np.ndarray` (see the [doc](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html#numpy.ndarray))
%% Cell type:code id: tags:
``` python
for i,v in enumerate([s for s in dir(A) if not s.startswith('__')]):
print(f'{v:16}', end='')
if (i+1) % 6 == 0 :print('')
```
%% Cell type:code id: tags:
``` python
# Ex1: Get the mean through different dimensions
print(A)
print('Mean value', A.mean())
print('Mean line', A.mean(axis=0))
print('Mean column', A.mean(axis=1))
```
%% Cell type:code id: tags:
``` python
# Ex2: Convert a 2D array in 1D keeping all elements
print(A)
print(A.shape)
A_flat = A.flatten()
print(A_flat, A_flat.shape)
```
%% Cell type:markdown id: tags:
### 4.1 - Remark: dot product
%% Cell type:code id: tags:
``` python
b = np.linspace(0, 10, 11)
c = b @ b
# before 3.5:
# c = b.dot(b)
print(b)
print(c)
```
%% Cell type:markdown id: tags:
### 4.2 - For Matlab users
| ` ` | Matlab | Numpy |
| ------------- | ------ | ----- |
| element wise | `.*` | `*` |
| dot product | `*` | `@` |
%% Cell type:markdown id: tags:
`numpy` arrays can also be sorted, even when they are composed of complex data if the type of the columns are explicitly stated with `dtypes`.
%% Cell type:markdown id: tags:
### 4.3 - NumPy and SciPy sub-packages:
We already saw `numpy.random` to generate `numpy` arrays filled with random values. This submodule also provides functions related to distributions (Poisson, gaussian, etc.) and permutations.
%% Cell type:markdown id: tags:
To perform linear algebra with dense matrices, we can use the submodule `numpy.linalg`. For instance, in order to compute the determinant of a random matrix, we use the method `det`
%% Cell type:code id: tags:
``` python
A = np.random.random([5,5])
print(A)
np.linalg.det(A)
```
%% Cell type:code id: tags:
``` python
squared_subA = A[1:3, 1:3]
print(squared_subA)
np.linalg.inv(squared_subA)
```
%% Cell type:markdown id: tags:
### 4.4 - Introduction to Pandas: Python Data Analysis Library
Pandas is an open source library providing high-performance, easy-to-use data structures and data analysis tools for Python.
[Pandas tutorial](https://pandas.pydata.org/pandas-docs/stable/10min.html)
[Grenoble Python Working Session](https://github.com/iutzeler/Pres_Pandas/)
[Pandas for SQL Users](http://sergilehkyi.com/translating-sql-to-pandas/)
[Pandas Introduction Training HPC Python@UGA](https://gricad-gitlab.univ-grenoble-alpes.fr/python-uga/training-hpc/-/blob/master/ipynb/11_pandas.ipynb)
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [ACTF1] - Activation functions
<!-- DESC --> Some activation functions, with their derivatives.
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- View the main activation functions
Les fonctions d'activation dans Keras :
https://www.tensorflow.org/api_docs/python/tf/keras/activations
## What we're going to do :
- Juste visualiser les principales fonctions d'activation
%% Cell type:code id: tags:
``` python
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import math
from math import erfc, sqrt, exp
from math import pi as PI
from math import e as E
import sys
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('ACTF1')
```
%% Cell type:code id: tags:
``` python
SELU_A = -sqrt(2/PI)/(erfc(1/sqrt(2))*exp(1/2)-1)
SELU_L = (1-erfc(1/sqrt(2))*sqrt(E))*sqrt(2*PI) / (2*erfc(sqrt(2))*E*E+PI*erfc(1/sqrt(2))**2*E-2*(2+PI)*erfc(1/sqrt(2))*sqrt(E)+PI+2)**0.5
def heaviside(z):
return np.where(z<0,0,1)
def sign(z):
return np.where(z<0,-1,1)
# return np.sign(z)
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def tanh(z):
return np.tanh(z)
def relu(z):
return np.maximum(0, z)
def leaky_relu(z,a=0.05):
return np.maximum(a*z, z)
def elu(z,a=1):
#y=z.copy()
y=a*(np.exp(z)-1)
y[z>0]=z[z>0]
return y
def selu(z):
return SELU_L*elu(z,a=SELU_A)
def derivative(f, z, eps=0.000001):
return (f(z + eps) - f(z - eps))/(2 * eps)
```
%% Cell type:code id: tags:
``` python
pw=5
ph=5
z = np.linspace(-5, 5, 200)
# ------ Heaviside
#
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(pw,ph)
ax.set_xlim(-5, 5)
ax.set_ylim(-2, 2)
ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
ax.plot(0, 0, "rx", markersize=10)
ax.plot(z, heaviside(z), linestyle='-', label="Heaviside")
ax.plot(z, derivative(heaviside, z), linewidth=3, alpha=0.6, label="dHeaviside/dx")
# ax.plot(z, sign(z), label="Heaviside")
ax.set_title("Heaviside")
fidle.scrawler.save_fig('Heaviside')
plt.show()
# ----- Logit/Sigmoid
#
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(pw,ph)
ax.set_xlim(-5, 5)
ax.set_ylim(-2, 2)
ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
ax.plot(z, sigmoid(z), label="Sigmoid")
ax.plot(z, derivative(sigmoid, z), linewidth=3, alpha=0.6, label="dSigmoid/dx")
ax.set_title("Logit")
fidle.scrawler.save_fig('Logit')
plt.show()
# ----- Tanh
#
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(pw,ph)
ax.set_xlim(-5, 5)
ax.set_ylim(-2, 2)
ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
ax.plot(z, tanh(z), label="Tanh")
ax.plot(z, derivative(tanh, z), linewidth=3, alpha=0.6, label="dTanh/dx")
ax.set_title("Tanh")
fidle.scrawler.save_fig('Tanh')
plt.show()
# ----- Relu
#
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(pw,ph)
ax.set_xlim(-5, 5)
ax.set_ylim(-2, 2)
ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
ax.plot(z, relu(z), label="ReLU")
ax.plot(z, derivative(relu, z), linewidth=3, alpha=0.6, label="dReLU/dx")
ax.set_title("ReLU")
fidle.scrawler.save_fig('ReLU')
plt.show()
# ----- Leaky Relu
#
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(pw,ph)
ax.set_xlim(-5, 5)
ax.set_ylim(-2, 2)
ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
ax.plot(z, leaky_relu(z), label="Leaky ReLU")
ax.plot(z, derivative( leaky_relu, z), linewidth=3, alpha=0.6, label="dLeakyReLU/dx")
ax.set_title("Leaky ReLU (α=0.05)")
fidle.scrawler.save_fig('LeakyReLU')
plt.show()
# ----- Elu
#
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(pw,ph)
ax.set_xlim(-5, 5)
ax.set_ylim(-2, 2)
ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
ax.plot(z, elu(z), label="ReLU")
ax.plot(z, derivative( elu, z), linewidth=3, alpha=0.6, label="dExpReLU/dx")
ax.set_title("ELU (α=1)")
fidle.scrawler.save_fig('ELU')
plt.show()
# ----- Selu
#
fig, ax = plt.subplots(1, 1)
fig.set_size_inches(pw,ph)
ax.set_xlim(-5, 5)
ax.set_ylim(-2, 2)
ax.axhline(y=0, linewidth=1, linestyle='--', color='lightgray')
ax.axvline(x=0, linewidth=1, linestyle='--', color='lightgray')
ax.plot(z, selu(z), label="SeLU")
ax.plot(z, derivative( selu, z), linewidth=3, alpha=0.6, label="dSeLU/dx")
ax.set_title("ELU (SELU)")
fidle.scrawler.save_fig('SeLU')
plt.show()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [PANDAS1] - Quelques exemples avec Pandas
<!-- DESC --> pandas is another essential tool for the Scientific Python.
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Understand how to slice a dataset
%% Cell type:markdown id: tags:
## Step 1 - A little cooking with datasets
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
```
%% Cell type:code id: tags:
``` python
# Get some data
a = np.arange(50).reshape(10,5)
print('Starting data: \n',a)
```
%% Cell type:code id: tags:
``` python
# Create a DataFrame
df_all = pd.DataFrame(a, columns=['A','B','C','D','E'])
print('\nDataFrame :')
display(df_all)
```
%% Cell type:code id: tags:
``` python
# Shuffle data
df_all = df_all.sample(frac=1, axis=0)
print('\nDataFrame randomly shuffled :')
display(df_all)
```
%% Cell type:code id: tags:
``` python
# Get a train part
df_train = df_all.sample(frac=0.8, axis=0)
print('\nTrain set (80%) :')
display(df_train)
```
%% Cell type:code id: tags:
``` python
# Get test set as all - train
df_test = df_all.drop(df_train.index)
print('\nTest set (all - train) :')
display(df_test)
```
%% Cell type:code id: tags:
``` python
x_train = df_train.drop('E', axis=1)
y_train = df_train['E']
x_test = df_test.drop('E', axis=1)
y_test = df_test['E']
display(x_train)
display(y_train)
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id:51be1de8 tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [PYTORCH1] - Practical Lab : PyTorch
<!-- DESC --> PyTorch est l'un des principaux framework utilisé dans le Deep Learning
<!-- AUTHOR : Kamel Guerda (CNRS/IDRIS) -->
## Objectives :
- Understand PyTorch
%% Cell type:markdown id:1959d3d5-388e-4c43-8318-342f08e6b024 tags:
## **Introduction**
%% Cell type:markdown id:a6da1305-551a-4549-abed-641415823a33 tags:
**PyTorch** is an open-source machine learning library developed by Facebook's AI Research lab. It offers an imperative and dynamic computational model, making it particularly easy and intuitive for researchers. Its primary feature is the tensor, a multi-dimensional array similar to NumPy's ndarray, but with GPU acceleration.
%% Cell type:markdown id:54c79dfb-a061-4b72-afe3-c97c28071e5c tags:
### **Installation and usage**
%% Cell type:markdown id:20852981-c289-4c4e-8099-2c5efef58e3b tags:
Whether you're working on the supercomputer Jean Zay or your own machine, getting your environment ready is the first step. Here's how to proceed:
%% Cell type:markdown id:a88f32bd-37f6-4e99-97e0-62283a146a1f tags:
#### **On Jean Zay**
%% Cell type:markdown id:8421a9f0-130d-40ef-8a7a-066bf9147066 tags:
For those accessing the Jean Zay supercomputer (you should already be at step 3):
1. **Access JupyterHub**: Go to [https://jupyterhub.idris.fr](https://jupyterhub.idris.fr). The login credentials are the same as those used to access the Jean Zay machine. Ensure your IP address is whitelisted (add a new IP via the account management form if needed).
2. **Create a JupyterLab Instance**: Choose to create the instance either on a frontend node (e.g., for internet access) or on a compute node by reserving resources via Slurm. Select the appropriate options such as workspace, allocated resources, billing, etc.
3. **Choose the Kernel**: IDRIS provides kernels based on modules installed on Jean Zay. This includes various versions of Python, Tensorflow, and PyTorch. Create a new notebook with the desired kernel through the launcher or change the kernel on an existing notebook by clicking the kernel name at the top right of the screen.
4. For advanced features like Tensorboard, MLFlow, custom kernel creation, etc., refer to the [JupyterHub technical documentation](https://jupyterhub.idris.fr/services/documentation/).
%% Cell type:markdown id:a168594c-cf18-4ed8-babf-242b56b3e0b7 tags:
> **Task:** Verifying Your Kernel in the upper top corner
> - In JupyterLab, at the top right of your notebook, you should see the name of your current kernel.
> - Ensure it matches "PyTorch 2.0" or a similar name indicating the PyTorch version.
> - If it doesn't, click on the kernel name and select the appropriate kernel from the list.
%% Cell type:markdown id:0aaadeee-5115-48d0-aa57-20a0a63d5054 tags:
#### **Elsewhere**
%% Cell type:markdown id:5d34951e-1b7b-4776-9449-eff57a9385f4 tags:
For users on other platforms:
1. Install PyTorch by following the official [installation guide](https://pytorch.org/get-started/locally/).
2. If you have a GPU, ensure you've installed the necessary CUDA toolkit and cuDNN libraries.
3. Launch your preferred Python environment, whether it's Jupyter notebook, an IDE like PyCharm, or just the terminal.
Once your setup is complete, you're ready to dive in. Let's explore the fascinating world of deep learning!
%% Cell type:markdown id:7552d5ac-eb8c-48e0-9e61-3b056d560f7b tags:
### **Version**
%% Cell type:code id:272e492f-35c5-4293-b504-8e8632da1b73 tags:
``` python
# Importing PyTorch
import torch
# TODO: Print the version of PyTorch being used
```
%% Cell type:markdown id:9fdbe225-4e06-4ad0-abca-4325457dc0e1 tags:
<details>
<summary>Hint (click to reveal)</summary>
To print the version of PyTorch you're using, you can access the <code>__version__</code> attribute of the <code>torch</code> module.
```python
print(torch.__version__)
```
%% Cell type:markdown id:72752068-02fe-4e44-8c27-40e8f66680c9 tags:
**Why PyTorch 2.0 is a Game-Changer**
PyTorch 2.0 represents a major step in the evolution of this popular deep learning library. As part of the transition to the 2-series, let's highlight some reasons why this version is pivotal:
1. **Performance**: With PyTorch 2.0, performance has been supercharged at the compiler level, offering faster execution and support for Dynamic Shapes and Distributed systems.
2. **torch.compile**: This introduces a more Pythonic approach, moving some parts of PyTorch from C++ back to Python. Notably, across a test set of 163 open-source models, the use of `torch.compile` resulted in a 43% speed increase during training on an NVIDIA A100 GPU.
3. **Innovative Technologies**: Technologies like TorchDynamo and TorchInductor, both written in Python, make PyTorch more flexible and developer-friendly.
4. **Staying Pythonic**: PyTorch 2.0 emphasizes Python-centric development, reducing barriers for developers and vendors.
As we progress in this lab, we'll dive deeper into some of these features, giving you hands-on experience with the power and flexibility of PyTorch 2.0.
%% Cell type:markdown id:bc215c02-1f16-48be-88f9-5080fd2be9ed tags:
## **Pytorch Fundamentals**
%% Cell type:markdown id:bcd7f0fc-a714-495e-9307-e48964abd85b tags:
### **Tensors**
%% Cell type:markdown id:6e185bf6-3d3c-4a43-b425-e6aa3da5d5dd tags:
A **tensor** is a generalization of vectors and matrices and is easily understood as a multi-dimensional array. In the context of PyTorch:
- A 0-dimensional tensor is a scalar (a single number).
- A 1-dimensional tensor is a vector.
- A 2-dimensional tensor is a matrix.
- ... and so on for higher dimensions.
Tensors are fundamental to PyTorch not just as data containers but also for their compatibility with GPU acceleration, making operations on them extremely fast. This acceleration is vital for training large neural networks.
Let's start our journey with tensors by examining how PyTorch handles scalars.
%% Cell type:markdown id:fa90e399-3955-4417-a4a3-c0c812ebb1d9 tags:
#### **Scalars in PyTorch**
### Scalars in PyTorch
A scalar, being a 0-dimensional tensor, is simply a single number. While it might seem trivial, understanding scalars in PyTorch lays the foundation for grasping more complex tensor structures. Familiarize yourself with the `torch.tensor()` function from the [official documentation](https://pytorch.org/docs/stable/generated/torch.tensor.html) before proceeding.
> **Task**: Create a scalar tensor in PyTorch and examine its properties.
%% Cell type:code id:b6db1841-0fab-4df0-b699-058d5a477ca6 tags:
``` python
# TODO: Create a scalar tensor with the value 7.5
scalar_tensor = # Your code here
# Print the scalar tensor
print("Scalar Tensor:", scalar_tensor)
# TODO: Print its dimension, shape, and type
```
%% Output
Cell In[2], line 2
scalar_tensor = # Your code here
^
SyntaxError: invalid syntax
%% Cell type:markdown id:c9bc265c-9a7f-4588-8586-562b390d63d9 tags:
<details>
<summary>Hint (click to reveal)</summary>
To create a scalar tensor, use the <code>torch.tensor()</code> function. To retrieve its dimension, shape, and type, you can use the <code>.dim()</code>, <code>.shape</code>, and <code>.dtype</code> attributes respectively.
Here's how you can achieve that:
```python
scalar_tensor = torch.tensor(7.5)
print("Scalar Tensor:", scalar_tensor)
print("Dimension:", scalar_tensor.dim())
print("Shape:", scalar_tensor.shape)
print("Type:", scalar_tensor.dtype)
```
</details>
%% Cell type:markdown id:fc240c26-5866-4080-bbb9-d5cde1500300 tags:
#### **Vectors in PyTorch**
A vector in PyTorch is a 1-dimensional tensor. It's essentially a list of numbers that can represent anything from a sequence of data points to the weights of a neural network layer.
In this section, we'll see how to create and manipulate vectors using PyTorch. We'll also look at some basic operations you can perform on them.
> **Task**: Create a 1-dimensional tensor (vector) with values `[1.5, 2.3, 3.1, 4.8, 5.2]` and print its dimension, shape, and type.
Start by referring to the `torch.tensor()` function in the [official documentation](https://pytorch.org/docs/stable/generated/torch.tensor.html) to understand how to create tensors of varying dimensions.
%% Cell type:code id:e9503b49-38d1-45d9-910f-761da82cfbd0 tags:
``` python
# TODO: Create a 1-dimensional tensor (vector) with values [1.5, 2.3, 3.1, 4.8, 5.2]
vector_tensor = # Your code here
# Print the vector tensor
print("Vector Tensor:", vector_tensor)
# TODO: Print its dimension, shape, and type
```
%% Output
Cell In[3], line 2
vector_tensor = # Your code here
^
SyntaxError: invalid syntax
%% Cell type:markdown id:13252d1f-004f-42e0-aec9-56322b43ab72 tags:
<details>
<summary>Hint (click to reveal)</summary>
Creating a 1-dimensional tensor is similar to creating a scalar. Instead of a single number, you pass a list of numbers to the <code>torch.tensor()</code> function. The <code>.dim()</code>, <code>.shape</code>, and <code>.dtype</code> attributes will help you retrieve its properties.
```python
vector_tensor = torch.tensor([1.5, 2.3, 3.1, 4.8, 5.2])
print("Vector Tensor:", vector_tensor)
print("Dimension:", vector_tensor.dim())
print("Shape:", vector_tensor.shape)
print("Type:", vector_tensor.dtype)
```
</details>
%% Cell type:markdown id:7bfc47a8-e99d-4683-ac36-287f35a76fd0 tags:
#### **Vector Operations**
Vectors are not just static entities; we often perform various operations on them, especially in the context of neural networks. This includes addition, subtraction, scalar multiplication, dot products, etc.
> **Task**: Using the previously defined `vector_tensor`, perform the following operations:
1. Add 5 to all the elements of the vector.
2. Multiply all the elements of the vector by 2.
3. Compute the dot product of the vector with itself.
%% Cell type:code id:86182e1c-5491-4743-a7c8-10b9effd8194 tags:
``` python
# TODO: Add 5 to all elements
vector_added = # Your code here
# TODO: Multiply all elements by 2
vector_multiplied = # Your code here
# TODO: Compute the dot product with itself
dot_product = # Your code here
# Print the results
print("Vector after addition:", vector_added)
print("Vector after multiplication:", vector_multiplied)
print("Dot Product:", dot_product)
```
%% Output
Cell In[4], line 2
vector_added = # Your code here
^
SyntaxError: invalid syntax
%% Cell type:markdown id:75773a02-3ab4-4325-99fb-7a742e997f21 tags:
<details>
<summary>Hint (click to reveal)</summary>
PyTorch tensors support regular arithmetic operations. For the dot product, you can use the <code>torch.dot()</code> function.
```python
vector_added = vector_tensor + 5
vector_multiplied = vector_tensor * 2
dot_product = torch.dot(vector_tensor, vector_tensor)
print("Vector after addition:", vector_added)
print("Vector after multiplication:", vector_multiplied)
print("Dot Product:", dot_product)
```
</details>
%% Cell type:markdown id:2b4766ba-ef9a-4f24-ba43-7358097a7b61 tags:
#### **Matrices in PyTorch**
A matrix in PyTorch is represented as a 2D tensor. Just as vectors are generalizations of scalars, matrices are generalizations of vectors, providing an additional dimension. Matrices are crucial for a range of operations in deep learning, including representing datasets, transformations, and more.
%% Cell type:markdown id:2ec7544d-ef87-4773-88d8-cee731d1c43c tags:
##### **Creating Matrices**
Before diving into manual matrix creation, it's beneficial to know some utility functions PyTorch provides:
- `torch.rand()`: Generates a matrix with random values between 0 and 1.
- `torch.eye()`: Creates an identity matrix.
- `torch.zeros()`: Generates a matrix filled with zeros.
- `torch.ones()`: Generates a matrix filled with ones.
You can explore more about these functions in the [official documentation](https://pytorch.org/docs/stable/tensors.html).
> **Task**: Using the above functions, create the following matrices:
> 1. A 3x3 matrix with random values.
> 2. A 5x5 identity matrix.
> 3. A 2x4 matrix filled with zeros.
> 4. A 4x2 matrix filled with ones.
%% Cell type:code id:5014b564-6bf5-4f00-a513-578ca72d94a8 tags:
``` python
# Your code for creating the matrices goes here
```
%% Cell type:markdown id:86b2708c-45c6-4b2c-b526-41491fcafa08 tags:
<details>
<summary>Hint (click to reveal)</summary>
To create these matrices, make use of the following functions:
1. `torch.rand(size)`: Use this function and specify the size as `(3, 3)` to create a 3x3 matrix with random values.
2. `torch.eye(n, m)`: Use this to generate an identity matrix. For a square matrix like 5x5, n and m would both be 5.
3. `torch.zeros(m, n)`: For a 2x4 matrix filled with zeros, specify m=2 and n=4.
4. `torch.ones(m, n)`: Similar to the `zeros` function but fills the matrix with ones.
```python
# 1. 3x3 matrix with random values
random_matrix = torch.rand(3, 3)
print(random_matrix)
# 2. 5x5 identity matrix
identity_matrix = torch.eye(5, 5)
print(identity_matrix)
# 3. 2x4 matrix filled with zeros
zero_matrix = torch.zeros(2, 4)
print(zero_matrix)
# 4. 4x2 matrix filled with ones
one_matrix = torch.ones(4, 2)
print(one_matrix)
```
</details>
%% Cell type:markdown id:60ff5e51-699e-46a1-8cc7-1d5fc9a4d078 tags:
#### **Matrix Operations in PyTorch**
Just like vectors, matrices can undergo a variety of operations. Some of the basic ones include matrix addition, subtraction, and multiplication. More advanced operations include matrix inversion, transposition, and determinant calculation.
%% Cell type:markdown id:c6bdb9d9-b299-4d63-b92f-7c4b8c32a1b7 tags:
##### **Basic Matrix Operations**
> **Task**: Perform the following operations on matrices:
> 1. Create two 3x3 matrices with random values.
> 2. Add the two matrices.
> 3. Subtract the second matrix from the first one.
> 4. Multiply the two matrices element-wise.
Remember, for matrix multiplication that results in the dot product, you'd use `torch.mm` or `@`, but for element-wise multiplication, you use `*`.
Here's the [official documentation](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.matmul) on matrix operations for your reference.
%% Cell type:code id:6be8c647-c455-4d3b-8a21-c4b7102ffa75 tags:
``` python
# Your code for creating the matrices and performing the operations goes here
```
%% Cell type:markdown id:0020b26b-b2bb-4efa-9bf3-3f037acd050e tags:
<details>
<summary>Hint (click to reveal)</summary>
Here's how you can perform the given matrix operations:
```python
# 1. Create two 3x3 matrices with random values
matrix1 = torch.rand(3, 3)
matrix2 = torch.rand(3, 3)
print("Matrix 1:\n", matrix1)
print("\nMatrix 2:\n", matrix2)
# 2. Add the two matrices
sum_matrix = matrix1 + matrix2
print("\nSum of matrices:\n", sum_matrix)
# 3. Subtract the second matrix from the first one
difference_matrix = matrix1 - matrix2
print("\nDifference of matrices:\n", difference_matrix)
# 4. Multiply the two matrices element-wise
product_matrix = matrix1 * matrix2
print("\nElement-wise product of matrices:\n", product_matrix)
```
</details>
%% Cell type:markdown id:07f57464-76e2-4670-8332-3fcec2e162bd tags:
#### **Higher-Dimensional Tensors in PyTorch**
While scalars, vectors, and matrices cover 0D, 1D, and 2D tensors respectively, in deep learning, especially in tasks like image processing, you often encounter tensors with more than two dimensions.
For instance, a colored image is often represented as a 3D tensor: height x width x channels (e.g., RGB channels). A batch of such images would then be a 4D tensor: batch_size x height x width x channels.
Let's get our hands dirty with some higher-dimensional tensors!
%% Cell type:markdown id:3dd1fea7-d290-49fe-ac1f-5a8387e3d386 tags:
##### **Creating a 3D Tensor**
> **Task**: Create a 3D tensor representing 2 images of size 4x4 with 3 channels (like RGB) filled with random values.
Use the `torch.rand` function, and remember to specify the dimensions correctly.
Here's the [official documentation](https://pytorch.org/docs/stable/tensors.html#creation-ops) for tensor creation.
%% Cell type:code id:e7c8ac6e-f870-4b5d-ac2c-05be1d0cc9f1 tags:
``` python
# Your code for creating the 3D tensor goes here
```
%% Cell type:markdown id:efe61750-a91f-428a-b4e2-7df0cc2a782b tags:
<details>
<summary>Hint (click to reveal)</summary>
Creating a 3D tensor with the given specifications can be achieved using the `torch.rand` function. Here's how:
```python
# Create a 3D tensor representing 2 images of size 4x4 with 3 channels
image_tensor = torch.rand(2, 4, 4, 3)
print(image_tensor)
```
</details>
%% Cell type:markdown id:8cfbcaa0-a0f6-4869-ba94-65d4439a60ca tags:
#### **Reshaping Tensors**
In deep learning, we often need to reshape our tensors. For instance, an image represented as a 3D tensor might need to be reshaped into a 1D tensor before passing it through a fully connected layer. PyTorch provides methods to make this easy.
The most commonly used method for reshaping tensors in PyTorch is the `view()` method. Another method that offers more flexibility (especially when you're unsure about the size of one dimension) is `reshape()`.
>[Task]: Using the official documentation, find out how to use the [`view()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view) and [`reshape()`](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.reshape) methods. Create a 2x3 tensor using `torch.tensor()` and then reshape it into a 3x2 tensor.
%% Cell type:code id:e6758ba7-aa35-42f0-87c1-86b88de64238 tags:
``` python
# Create a 2x3 tensor
# Reshape it into a 3x2 tensor
```
%% Cell type:markdown id:fea31255-c2fe-47b2-b03b-c2b35953e05a tags:
<details>
<summary>Hint (click to reveal)</summary>
To reshape a tensor using <code>view()</code> method:
```python
tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
reshaped_tensor = tensor.view(3, 2)
```
<br>
Alternatively, using the <code>reshape()</code> method:
```python
reshaped_tensor = tensor.reshape(3, 2)
```
</details>
%% Cell type:markdown id:c580dbca-b75a-4b97-a24a-6a19c7cdf8d1 tags:
#### **Broadcasting**
Broadcasting is a powerful feature in PyTorch that allows you to perform operations between tensors of different shapes. When possible, PyTorch will automatically reshape the tensors in a way that makes the operation valid. This can significantly reduce manual reshaping and is efficient in memory usage.
However, it's essential to understand the rules and nuances of broadcasting to use it effectively and avoid unexpected behaviors.
>[Task]: Given a tensor `A` of shape (4, 1) and another tensor `B` of shape (1, 4), use PyTorch operations to produce a result tensor of shape (4, 4). Check the [official documentation on broadcasting](https://pytorch.org/docs/stable/notes/broadcasting.html) for guidance.
%% Cell type:code id:44566fb7-87ed-41ef-a86e-db32a1cf2179 tags:
``` python
# Define tensor A of shape (4, 1) and tensor B of shape (1, 4)
# Perform an operation to get a result tensor of shape (4, 4)
```
%% Cell type:markdown id:2602f2c4-f507-4a9a-8e8d-dee5e95efc61 tags:
<details>
<summary>Hint (click to reveal)</summary>
You can simply use addition, subtraction, multiplication, or any other element-wise operation. When you do this operation, PyTorch will automatically broadcast the tensors to a compatible shape. For example:
```python
A = torch.tensor([[1], [2], [3], [4]])
B = torch.tensor([[1, 2, 3, 4]])
result = A * B
print(result)
```
</details>
%% Cell type:markdown id:ba2cc439-8ecc-4d92-b78f-39ef762678f8 tags:
### **GPU Support with CUDA**
%% Cell type:markdown id:575536c5-87a7-4781-8557-558627f14c0a tags:
PyTorch seamlessly supports operations on Graphics Processing Units (GPUs) through CUDA, an API developed by NVIDIA for their GPUs. If you have a compatible NVIDIA GPU on your machine, PyTorch can utilize it to speed up tensor operations which can be orders of magnitude faster than on a CPU.
To verify if your PyTorch installation can use CUDA, you can check the attribute `torch.cuda.is_available()`. This returns `True` if CUDA is available and PyTorch can use GPUs, otherwise it returns `False`.
>[Task]: Print whether CUDA support is available on your system. The [CUDA documentation](https://pytorch.org/docs/stable/cuda.html) might be useful for this task.
%% Cell type:code id:38e84bb7-5026-4262-8b78-b368c55a1450 tags:
``` python
# Check and print if CUDA is available
cuda_available = None # Replace None with the appropriate code
print("CUDA available:", cuda_availablez
```
%% Cell type:markdown id:646b5660-5131-4ce0-9592-0fd14608c6df tags:
<details>
<summary>Hint (click to reveal)</summary>
To check if CUDA is available, you can utilize the torch.cuda.is_available() function.
```python
cuda_available = torch.cuda.is_available()
print("CUDA available:", cuda_available)
```
</details>
%% Cell type:markdown id:86c8d7ed-0931-4874-bb27-e796ae1a1d7a tags:
When developing deep learning models in PyTorch, it's a good habit to write device-agnostic code. This means your code can automatically use a GPU if available, or fall back to using the CPU if not. The `torch.device` object allows you to specify the device (either CPU or GPU) where you'd like your tensors to be allocated.
To dynamically determine the device, a common pattern is to check `torch.cuda.is_available()`, and set the device accordingly. This is particularly useful when you want your code to be flexible, regardless of the underlying hardware.
>[Task]: Define a `device` variable that is set to 'cuda:0' if CUDA is available and 'cpu' otherwise. Create a tensor on this device. The [documentation about torch.device](https://pytorch.org/docs/stable/tensor_attributes.html#torch-device) might be handy.
%% Cell type:code id:91e05e75-03ad-44cb-9842-89e2017ee709 tags:
``` python
# Define the device
device = None # Replace None with the appropriate code
# Create a tensor on the specified device
tensor_on_device = torch.tensor([1, 2, 3, 4, 5], device=device)
```
%% Cell type:markdown id:3b80406b-b1cc-4831-a6ba-8e6385703755 tags:
<details>
<summary>Hint (click to reveal)</summary>
To define the device variable dynamically:
```python
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
```
<br>
After setting the device, you can create tensors on it directly using the device argument.
</details>
%% Cell type:markdown id:574a2192-cc09-4d2c-8f01-97b051b7ffc8 tags:
### **Automatic Differentiation with Autograd**
%% Cell type:markdown id:7f5406f6-e295-4f70-a815-9eef18352390 tags:
PyTorch's `autograd` module provides the tools for automatically computing the gradients for tensors. This feature is a cornerstone for neural network training, as gradients are essential for optimization algorithms like gradient descent.
When we create a tensor, `requires_grad` is set to `False` by default, meaning it won't track operations. However, if we set `requires_grad=True`, PyTorch will start to track all operations on the tensor.
Let's start with a simple example:
>**Task:** Create a tensor that holds a single value, let's say 2, and set `requires_grad=True`. Then, define a simple operation like squaring the tensor. Finally, inspect the resulting tensor. The [documentation for requires_grad](https://pytorch.org/docs/stable/autograd.html#torch.Tensor.requires_grad) might be handy.
%% Cell type:code id:fe63ab93-55be-434d-822f-8fd9cd727941 tags:
``` python
# TODO: Create a tensor, perform a simple operation, and print its data and grad_fn separately.
```
%% Cell type:markdown id:fa7ee20c-c2d6-4dcf-bb37-9eda580b5dc5 tags:
<details>
<summary>Hint (click to reveal)</summary>
To create a tensor with requires_grad=True and square it:
```python
# TODO: Create a tensor, perform a simple operation, and print its data and grad_fn separately.
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2
print("Data:", y.data)
print("grad_fn:", y.grad_fn)
```
</details>
%% Cell type:markdown id:c14dde16-a6be-4151-94cb-96ae98f0648a tags:
Once the operation is executed on a tensor, a new attribute grad_fn is created. This attribute references a function that has created the tensor. In our example, since we squared the tensor, grad_fn will be of type PowBackward0.
This grad_fn attribute provides a link to the computational history of the tensor, allowing PyTorch to backpropagate errors and compute gradients when training neural networks.
%% Cell type:markdown id:0965e79e-558a-45a9-8ab2-614c503e59c0 tags:
#### **Computing Gradients**
%% Cell type:markdown id:36fb6c5b-9b39-4a2f-a767-61032b1b4ffc tags:
Now, let's compute the gradients of `out` with respect to `x`. To do this, we'll call the `backward()` method on the tensor `out`.
>[Task]: Compute the gradients of `out` by calling the `backward()` method on it. Afterwards, print the gradients of `x`. The [documentation for backward()](https://pytorch.org/docs/stable/autograd.html#torch.autograd.backward) may be useful.
%% Cell type:code id:83685760-bde9-4327-88f7-cfe02bdb3309 tags:
``` python
# TODO: Compute the gradient and print it.
```
%% Cell type:markdown id:9b1d104b-efef-4fff-869d-8dde1131868e tags:
<details>
<summary>Hint (click to reveal)</summary>
To compute the gradient:
```python
y.backward()
print(x.grad)
```
</details>
%% Cell type:markdown id:d7f5aecb-8623-481f-a5cf-f8b6dd0c9a37 tags:
#### **Gradient Accumulation**
%% Cell type:markdown id:1a4df0a1-12a0-4129-a258-915fa8440193 tags:
In PyTorch, the gradients of tensors are accumulated into the `.grad` attribute each time you call `.backward()`. This means that if you call `.backward()` multiple times, the gradients will add up.
However, by default, calling `.backward()` consumes the computational graph to save memory. If you intend to call `.backward()` multiple times on the same graph, you need to specify `retain_graph=True` during all but the last call.
>[Task]: Create a tensor, perform an operation on it, and then call `backward()` twice. Use `retain_graph=True` in the first call to retain the computational graph. Observe the `.grad` attribute after each call.
%% Cell type:code id:50a04095-9d7e-48ba-90ed-06718cd379f0 tags:
``` python
# Create a tensor
w = torch.tensor([1.0], requires_grad=True)
# Operation
result = w * 2
# TODO: Call backward twice (using retain_graph=True for the first call) and print the grad after each call
# ...
```
%% Cell type:markdown id:d699e58d-d479-466a-b592-cbf68d185c3b tags:
<details>
<summary>Hint (click to reveal)</summary>
```python
result.backward(retain_graph=True)
print(w.grad) # This should print 2
result.backward()
print(w.grad) # This should print 4, as gradients get accumulated
```
</details>
%% Cell type:markdown id:88d30f87-2469-4289-ad8a-51a25a2e8b82 tags:
#### **Zeroing Gradients**
%% Cell type:markdown id:2ea93580-9a35-4f5d-8f29-0a324d28d28a tags:
In neural network training, we typically want to update our weights with the gradients after each forward and backward pass. This means that we don't want the gradients to accumulate across multiple passes. Hence, it's common to zero out the gradients at the start of a new iteration.
>[Task]: Using the tensor from the previous cell, zero out its gradients and verify that it has been set to zero.
%% Cell type:code id:9cb03a91-d1df-4bbf-a0d2-b5580c643e12 tags:
``` python
# TODO: Zero out the gradients of w and print
```
%% Cell type:markdown id:4a89ff66-b1ef-413a-a41c-847e8c832e4b tags:
<details>
<summary>Hint (click to reveal)</summary>
```python
w.grad.zero_()
print(w.grad)
```
</details>
%% Cell type:markdown id:85f75515-3d89-4249-b00a-03c13cca92d4 tags:
#### **Non-Scalar Backward**
%% Cell type:markdown id:86a54a2c-e8c1-4278-a3fe-ed60564ebd07 tags:
When dealing with non-scalar tensors, `backward` requires an additional argument: the gradient of the tensor with respect to some scalar (usually a loss).
>[Task]: Create a tensor of shape (2, 2) with `requires_grad=True`. Compute a non-scalar result by multiplying the tensor with itself. Then, compute backward with a gradient argument. You can consult the [backward documentation](https://pytorch.org/docs/stable/autograd.html#torch.autograd.backward) for reference.
%% Cell type:code id:cc0e4271-c356-4a4e-9a3a-5df1403a4211 tags:
``` python
# TODO: Create a tensor, perform an operation, and compute backward with a gradient argument
```
%% Cell type:markdown id:e7ee72f3-f51c-4849-b41d-136028029185 tags:
<details>
<summary>Hint (click to reveal)</summary>
```python
v = torch.tensor([[2.0, 3.0], [4.0, 5.0]], requires_grad=True)
result = v * v
grads = torch.tensor([[1.0, 1.0], [1.0, 1.0]])
result.backward(grads)
```
</details>
%% Cell type:markdown id:2e403021-4854-4e97-9898-82ed355293e7 tags:
#### **Stopping Gradient Tracking**
%% Cell type:markdown id:ba644253-8523-480d-8318-a87047671a21 tags:
There are scenarios where we don't want to track the gradients for certain operations. This can be achieved in two main ways:
1. **Using `torch.no_grad()`**: This context manager ensures that the enclosed operations are excluded from gradient tracking.
2. **Using `.detach()`**: Creates a tensor that shares the same storage but does not require gradients.
>[Task]: Create a tensor with `requires_grad=True`. Then, demonstrate both methods above to prevent gradient computation.
%% Cell type:code id:1feb2f9b-0c5f-4e9d-b042-e74052bc83a9 tags:
``` python
# TODO: Demonstrate operations without gradient tracking
```
%% Cell type:markdown id:a5eff82b-bfbd-4be7-afa3-dc00f5341568 tags:
<details>
<summary>Hint (click to reveal)</summary>
```python
# Using torch.no_grad()
with torch.no_grad():
result_no_grad = v * v
print(result_no_grad.requires_grad)
# Using .detach()
detached_tensor = v.detach()
result_detach = detached_tensor * detached_tensor
print(result_detach.requires_grad)
```
</details>
%% Cell type:markdown id:efe66a5d-ac63-4623-8182-3b5aff58abbe tags:
## **Building a Simple Neural Network with PyTorch**
%% Cell type:markdown id:aa4b7630-fc1e-4f7b-b86b-3c0d233cdc49 tags:
Neural networks are the cornerstone of deep learning. They are organized as a series of interconnected nodes or "neurons" that are structured into layers: an input layer, several hidden layers, and an output layer. Data flows through this network, undergoing transformations at each node, until it emerges at the output.
With PyTorch's `torch.nn` module, constructing these neural networks becomes straightforward. Let's dive into its main components:
%% Cell type:markdown id:8e98f379-5580-477c-8b7b-c641f5edf710 tags:
### **nn.Module: The Base Class for Neural Networks**
%% Cell type:markdown id:15d72ea2-c846-44f5-85d5-bd1990c154bc tags:
Every neural network in PyTorch is derived from the `nn.Module` class. This class offers:
- Organization and management of the layers.
- Capabilities for GPU acceleration.
- Implementation of the forward pass.
When we inherit from `nn.Module`, our custom neural network class benefits from these functionalities.
For more details, you can refer to the official [documentation](https://pytorch.org/docs/stable/generated/torch.nn.Module.html).
>**Task:** Familiarize yourself with the structure of a simple neural network provided below. Later, you'll be enriching it.
%% Cell type:code id:425abefe-54b9-4944-bc6e-cc78de892c66 tags:
``` python
import torch.nn as nn
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
# Define layers here
def forward(self, x):
# Call the layers in the correct order here
return x
```
%% Cell type:markdown id:892e3b55-097b-436e-bbf8-a380fd7d9e35 tags:
### **Linear Layers: Making Connections**
%% Cell type:markdown id:564c17bb-543f-42f6-8c5d-b855ccaf71e6 tags:
In PyTorch, a linear layer performs an affine transformation. It has both weights and biases which get updated during training. The transformation it performs can be described as:
$ y = xA^T + b $
Where:
- \( x \) is the input
- \( A \) represents the weights
- \( b \) is the bias
The `nn.Linear` class in PyTorch creates such a layer.
[Documentation Link for nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html)
> **Task:** Add an input layer and an output layer to the `SimpleNet` class.
>
> - The input layer should transform from `input_size` to `hidden_size`.
> - The output layer should transform from `hidden_size` to `output_size`.
> - After defining the layers in the `__init__` method, call them in the `forward` method to perform the transformations.
%% Cell type:code id:daa8829a-05e9-474e-b6e6-c7f749e22295 tags:
``` python
# Modify the below code by adding input and output linear layers in the appropriate places
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
# Define layers here
def forward(self, x):
# Call the layers in the correct order here
return x
```
%% Cell type:markdown id:c5038840-2713-4492-b7ab-c70469a2e96e tags:
<details>
<summary>Hint (click to reveal)</summary>
To define the input and output linear layers, use the `nn.Linear` class in the `__init__` method:
Then, in the `forward` method, pass the input through the defined layers.
```python
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.input_layer = nn.Linear(input_size, hidden_size)
self.output_layer = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.input_layer(x)
x = self.output_layer(x)
return x
```
</details>
%% Cell type:markdown id:c2bb82c9-8949-4472-84fe-def36c514150 tags:
### **Activation Functions: Introducing Non-Linearity**
%% Cell type:markdown id:d989e2d8-5530-45f3-8664-e0d1b9eb627a tags:
Activation functions are critical components in neural networks, introducing non-linearity between layers. This non-linearity allows networks to learn from the error and make adjustments, which is essential for learning complex patterns.
In PyTorch, many activation functions are available as part of the `torch.nn` module, such as ReLU, Sigmoid, and Tanh.
For our `SimpleNet` model, we'll use the ReLU (Rectified Linear Unit) activation function after the input layer. The ReLU function is defined as \(f(x) = max(0, x)\).
Learn more about [ReLU and other activation functions in the official documentation](https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity).
> **Task**: Update your `SimpleNet` class to include the ReLU activation function after the input layer. For this, you'll need to both define the activation function in `__init__` and apply it in the `forward` method.
%% Cell type:code id:9e426301-5a55-46a2-8305-241b8f1ca4bf tags:
``` python
# Copy the previous SimpleNet definition and modify the code to include the ReLU activation function.
```
%% Cell type:markdown id:212ef244-f7bf-49a2-b4c9-b1b90af315de tags:
<details>
<summary>Hint (click to reveal)</summary>
To include the ReLU activation in your neural network:
1. Define the ReLU activation function in the `__init__` method.
2. Apply the activation function in the `forward` method after passing through the `input_layer`.
```python
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.input_layer = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU() # Defining the ReLU activation function
self.output_layer = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.input_layer(x)
x = self.relu(x) # Applying the ReLU activation function
x = self.output_layer(x)
return x
```
</details>
%% Cell type:markdown id:640ef2f4-6816-4c5e-955c-c14c33349512 tags:
#### **Adjusting the Network: Adding Dropout**
%% Cell type:markdown id:e5596abf-b262-461d-ad5f-6a3488a79a42 tags:
[Dropout](https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html) is a regularization technique that can improve generalization in neural networks. It works by randomly setting a fraction of input units to 0 at each update during training time.
> **Task**: Modify the `SimpleNet` class to include a dropout layer with a dropout probability of 0.5 between the input layer and the output layer. Don't forget to call this layer in the forward method.
>
> Remember, after modifying the class structure, you'll need to re-instantiate your model object.
%% Cell type:code id:1c68ffd4-1de6-4d77-a15f-705b24c924af tags:
``` python
# Add a dropout layer to your previous code
```
%% Cell type:markdown id:d78c2dab-95c1-441c-b661-80bfba9a2dfd tags:
<details>
<summary>Hint (click to reveal)</summary>
Here's how you can modify the SimpleNet class to include dropout:
```python
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.input_layer = nn.Linear(input_size, hidden_size)
self.dropout = nn.Dropout(0.5)
self.output_layer = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.input_layer(x)
x = self.dropout(x)
return self.output_layer(x)
model = SimpleNet(input_size, hidden_size, output_size).to(device)
```
Don't forget to create a new instance of your model: model = SimpleNet(input_size, hidden_size, output_size).to(device)
</details>
%% Cell type:markdown id:ce1cb22c-8288-4c69-9dcb-56896de49794 tags:
### **Utilizing the Neural Network**
%% Cell type:markdown id:255c3bf2-419d-4d14-82d6-7959e9280670 tags:
Once our neural network is defined, it's time to put it to use. This section will cover:
1. Instantiating the network
2. Transferring the network to GPU (if available)
3. Making predictions using the network (forward pass)
4. Understanding training and evaluation modes
5. Performing a backward pass to compute gradients
%% Cell type:markdown id:9f28cee5-c7a0-48c5-8341-6da6fae516c5 tags:
#### **1. Instantiating the Network**
%% Cell type:markdown id:0760bef6-d77a-4b7b-b5c7-18b208d93b98 tags:
To use our `SimpleNet`, we first need to create an instance of it. While creating an instance, the network's weights are also initialized.
> **Task**: Instantiate the `SimpleNet` class. Use `input_size=5`, `hidden_size=3`, and `output_size=1` as parameters.
%% Cell type:code id:ae9bfc87-5b09-476c-b32b-92c09f992fe3 tags:
``` python
# Your code here: Instantiate the model
```
%% Cell type:markdown id:f951e5d2-e0b4-451d-9a9b-44256f8a224c tags:
<details>
<summary>Hint (click to reveal)</summary>
To instantiate the SimpleNet class:
```python
model = SimpleNet(input_size=5, hidden_size=3, output_size=1)
print(model)
```
</details>
%% Cell type:markdown id:35567e41-6de6-429b-be4b-a14598313aca tags:
#### **2. Transferring the Network to GPU**
%% Cell type:markdown id:b3f3b3c3-4d7a-46db-9634-1e14b277c808 tags:
PyTorch makes it very straightforward to transfer our model to a GPU if one is available. This is done using the .to() method.
> **Task**: Check if GPU (CUDA) is available. If it is, transfer the model to the GPU.
%% Cell type:code id:91cb61a0-d890-4697-88d9-7749ea2bf144 tags:
``` python
# Check for GPU availability and transfer the model to GPU if available.
```
%% Cell type:markdown id:8a405f2d-3d8d-4e4c-90d1-54a05ff08b90 tags:
<details>
<summary>Hint (click to reveal)</summary>
To transfer the model to the GPU if it's available:
```python
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
```
</details>
%% Cell type:markdown id:175ab7cc-cddf-4460-ab01-f0193c2908d7 tags:
#### **3. Making Predictions using the Network (Forward Pass)**
%% Cell type:markdown id:e3724444-e0a6-48b0-8872-0b53b000a3bd tags:
With our model instantiated and potentially on a GPU, we can use it to make predictions. This involves passing some input data through the model, which is commonly referred to as a forward pass.
> **Task**: Create a tensor of size [1, 5] (representing one sample with five features) with random values. Transfer this tensor to the same device as your model (GPU or CPU). Then, pass this tensor through your model to get the prediction.
%% Cell type:code id:00e818ee-72e0-4960-a87e-a27b771d58eb tags:
``` python
# Create a tensor, transfer it to the right device, and perform a forward pass.
```
%% Cell type:markdown id:8bc38fde-0c14-45a6-b237-76ec7beab7f0 tags:
<details>
<summary>Hint (click to reveal)</summary>
To make predictions using your model:
```python
# Create a tensor with random values
input_tensor = torch.randn(1, 5).to(device)
# Pass the tensor through the model
output = model(input_tensor)
print(output)
```
</details>
%% Cell type:markdown id:fad9f46f-b591-4a2f-b2bf-3b4cf54cf961 tags:
#### **4. Understanding Training and Evaluation Modes**
%% Cell type:markdown id:2f197278-8d74-4a69-8da9-caf3f952e7bc tags:
Every PyTorch model has two modes:
- `train` mode: In this mode, certain layers like dropout or batch normalization behave differently than during evaluation. For instance, dropout will randomly set a fraction of input units to 0 at each update during training.
- `eval` mode: Here, the model behaves in a deterministic manner. Dropout layers don't drop activations, and batch normalization uses the entire dataset's statistics instead of the current mini-batch's statistics.
Setting the model to the correct mode is crucial. Let's demonstrate this.
> **Task**: Set your model to `train` mode, then perform a forward pass using the same input tensor multiple times and observe the outputs. Then, set your model to `eval` mode and repeat. Notice any differences?
%% Cell type:code id:4c2d921d-d409-4ae6-8ee4-8376fc9a209d tags:
``` python
# Perform the forward passes multiple times with the same input in both modes and observe the outputs.
```
%% Cell type:markdown id:0dbd65fa-b86b-4516-9fb1-aceae0c9d8a3 tags:
<details>
<summary>Hint (click to reveal)</summary>
Here's how you can demonstrate the difference:
```python
# Set to train mode
model.train()
# Forward pass multiple times
print("Train mode:")
for i in range(5):
print(model(input_tensor))
# Set to eval mode
model.eval()
print("Eval mode:")
# Forward pass multiple times
for i in range(5):
print(model(input_tensor))
```
If there were layers like dropout in your model, you'd notice that the outputs in training mode might differ on each pass, while in evaluation mode, they remain consistent.
</details>
%% Cell type:markdown id:e8c55be3-71f7-45e7-91d1-c556e8108fef tags:
## **The Training Procedure in PyTorch**
%% Cell type:markdown id:eac54af7-c8db-4a19-861b-2eecf68fb44e tags:
Training a neural network involves several key components: defining a loss function to measure errors, selecting an optimization method to adjust the model's weights, and iterating over the dataset multiple times. In this section, we will break down these components step by step, starting with the basics and moving towards more complex tasks.
%% Cell type:markdown id:3e9231a9-105c-4aed-bfa5-846ddc07245f tags:
### **Datasets and DataLoaders: Handling and Batching Data**
%% Cell type:markdown id:8dbc3fcf-5a29-4fd8-9e82-3eaae4c8dc90 tags:
In PyTorch, the torch.utils.data.Dataset class is used to represent a dataset. This abstract class requires the implementation of two primary methods: __len__ (to return the number of items) and __getitem__ (to return the item at a given index). However, PyTorch provides a utility class, TensorDataset, that wraps tensors in the dataset format, making it easier to use with the DataLoader.
The torch.utils.data.DataLoader class is a more powerful tool, responsible for:
- Batching the data
- Shuffling the data
- Loading the data in parallel using multiprocessing workers
Let's wrap some data in a Dataset and use a DataLoader to handle batching and shuffling.
> **Task**: Convert the input and target tensors into a dataset and dataloader. For this exercise, set the batch size to 32.
Below we define synthetic data that is learnable.
This way, we're essentially modeling the relationship $y=mx+c+noise$ where:
- $y$ is the target or output.
- $m$ is the slope of the line.
- $c$ is the y-intercept.
- $x$ is the input.
- $noise$ is a small random value added to each point to make the data more realistic.
%% Cell type:code id:f8335e62-e0c0-4381-9c20-1ca8ed78516c tags:
``` python
num_samples = 1000
# Define the relationship
m = 2.0
c = 1.0
noise_factor = 0.05
# Generate input tensor
input_tensor = torch.linspace(-10, 10, num_samples).view(-1, 1)
# Generate target tensor based on the relationship
target_tensor = m * input_tensor + c + noise_factor * torch.randn(num_samples, 1)
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
plt.scatter(input_tensor.numpy(), target_tensor.numpy(), color='blue', marker='o')
plt.title("Synthetic Data Visualization")
plt.xlabel("Input")
plt.ylabel("Target")
plt.grid(True)
plt.show()
```
%% Cell type:code id:9535ad7e-6534-491b-b38d-b61cdd60b39d tags:
``` python
# Convert our data into a dataset
# ...
# Create a data loader for mini-batch training
# ...
```
%% Cell type:markdown id:da99866e-ebd0-403d-8159-8a36d601bf09 tags:
<details>
<summary>Hint (click to reveal)</summary>
Use the TensorDataset class from torch.utils.data to wrap your tensors in a dataset format. After defining your dataset, you can use the DataLoader class to create an iterator that will return batches of data.
```python
from torch.utils.data import DataLoader, TensorDataset
# Convert our data into a dataset
dataset = TensorDataset(input_tensor, target_tensor)
# Create a data loader for mini-batch training
batch_size = 32
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
```
</details>
%% Cell type:markdown id:ea5aee0c-6c8a-485f-b099-9844a28bafa3 tags:
> **Task**: Explore the `dataset` and `data_loader`:
> 1. Print the total number of samples in the dataset and DataLoader.
> 2. Iterate one time over both and print the shape of items you retrieve.
%% Cell type:code id:244a8198-60c5-4154-93ab-3d96fbf3488a tags:
``` python
# Total number of samples
# ...
# Dataset elements
# ...
# DataLoader elements
# ...
```
%% Cell type:markdown id:882438f7-3cc7-4a20-a223-41ede7856ef4 tags:
<details>
<summary>Hint (click to reveal)</summary>
When you iterate over the dataset, each item you get from the iteration should be a tuple of (input, target), so you should retrieve two elements each of len 1.
On the other hand, when you iterate over the data_loader, each item you get from the iteration is a mini-batch of data. Thus, the length you get from each iteration should correspond to the batch size you've set (i.e., 5 in our case), except possibly the last batch if the dataset size isn't a perfect multiple of the batch size.
```python
# Total number of samples
print(f"Total samples in dataset: {len(dataset)}")
print(f"Total batches in DataLoader: {len(data_loader)}")
# Dataset elements
(index, (data, target)) = next(enumerate(dataset))
print(f"Sample {index}: Data shape {data.shape}, Target shape {target.shape}")
# DataLoader elements
(index, (batch_data, batch_target)) = next(enumerate(data_loader))
print(f"Batch {index}: Data shape {batch_data.shape}, Target shape {batch_target.shape}")
```
</details>
%% Cell type:markdown id:8dc08bb3-e5b2-4a7d-be10-6adc496a812d tags:
### **Splitting the Dataset: Training, Validation, and Testing Sets**
%% Cell type:markdown id:659a4899-cb14-4a47-b990-ea1a77592102 tags:
When training neural networks, it's common to split the dataset into at least two sets:
1. **Training Set**: This set is used to train the model, i.e., adjust the weights using gradient descent.
2. **Validation Set** (optional, but often used): This set is used to evaluate the model during training, allowing for hyperparameter tuning without overfitting.
3. **Test Set**: This set is used to evaluate the model's performance after training, providing an unbiased assessment of its performance on new, unseen data.
In PyTorch, we can use the `random_split` function from `torch.utils.data` to easily split datasets.
First, let's define the lengths for each split:
%% Cell type:code id:32202871-2911-44e6-8ad6-6d848cb3ede0 tags:
``` python
total_samples = len(dataset)
train_size = int(0.8 * total_samples)
val_size = total_samples - train_size
```
%% Cell type:markdown id:a1f7a839-8ee0-460f-bef0-87ca30f7409e tags:
> **Task**: Using the random_split function, split the dataset into a training set and a validation set using the sizes provided above.
[Here's the documentation for random_split](https://pytorch.org/docs/stable/data.html#torch.utils.data.random_split).
> **Task**: Create the train_loader and val_loader
%% Cell type:code id:50a80fc9-ef6e-4118-ad6a-3dea9d16e94f tags:
``` python
# Splitting the dataset
```
%% Cell type:markdown id:b01bb0d7-17c0-4edd-a2b6-17e4ca74b2aa tags:
<details>
<summary>Hint (click to reveal)</summary>
```python
# Splitting the dataset
from torch.utils.data import random_split
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
```
</details>
%% Cell type:markdown id:e2729431-701c-4451-931c-2ae0ed58dbb5 tags:
> **Task**: Now, using the provided training and validation datasets, print out the number of samples in each set. Also, fetch one sample from each set and print its shape.
%% Cell type:code id:770c42f6-7a52-4856-a4fe-23a60666389a tags:
``` python
# Your code here
```
%% Cell type:markdown id:583948e8-898a-4336-92c6-aaddef6adbcf tags:
<details>
<summary>Hint (click to reveal)</summary>
```python
# Print number of samples in each set
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of validation samples: {len(val_dataset)}")
# Fetching one sample from each set and printing its shape
train_sample, train_target = train_dataset[0]
print(f"Training sample shape: {train_sample.shape}, Target shape: {train_target.shape}")
val_sample, val_target = val_dataset[0]
print(f"Validation sample shape: {val_sample.shape}, Target shape: {val_target.shape}")
```
</details>
%% Cell type:markdown id:0fdec6d6-9b32-457d-b8e6-d94d8e020e4f tags:
### **Loss Functions: Measuring Model Errors**
%% Cell type:markdown id:899ce66c-e878-4f6a-b37c-34cdeae438a1 tags:
Every training process needs a metric to determine how well the model's predictions align with the actual data. This metric is called the loss function or cost function. PyTorch provides many [loss functions](https://pytorch.org/docs/stable/nn.html#loss-functions) suitable for different types of tasks.
Different problems might require different loss functions. PyTorch provides a variety of [loss functions](https://pytorch.org/docs/stable/nn.html#loss-functions) suited for different tasks. For instance:
- **Mean Squared Error (MSE)**: Commonly used for regression tasks.
- **Cross-Entropy Loss**: Suited for classification tasks.
For a simple regression task, a common choice is the Mean Squared Error (MSE) loss.
> **Task**: Familiarize yourself with the [MSE loss documentation](https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html). You will soon use it in the training loop.
> **Task**: Instantiate the Mean Squared Error (MSE) loss provided by PyTorch for our current neural network.
%% Cell type:code id:692e83d7-7382-4ab2-9caf-daa3a77bfd4d tags:
``` python
# Define the loss function.
```
%% Cell type:markdown id:7fe8dcb5-8a43-4561-88a0-a4a2a2d1bf53 tags:
<details>
<summary>Hint (click to reveal)</summary>
To define the MSE loss in PyTorch, you can use:
```python
criterion = nn.MSELoss()
```
</details>
%% Cell type:markdown id:e957d999-0a56-4320-808a-05d1af6b81c7 tags:
### **Optimizers: Adjusting Weights**
%% Cell type:markdown id:d3d4a09d-8838-4fd3-9e16-bfdc5018abde tags:
Optimizers adjust the weights of the network based on the gradients computed during backpropagation. Different optimizers might update weights in varying ways. For example, the popular **Stochastic Gradient Descent (SGD)** optimizer simply updates weights in the direction of negative gradients, while **Adam** and **RMSprop** are more advanced optimizers that consider aspects like momentum and weight decay.
PyTorch offers a wide range of [optimizers](https://pytorch.org/docs/stable/optim.html).
> **Task**: Review the [SGD optimizer documentation](https://pytorch.org/docs/stable/optim.html#torch.optim.SGD). It will be pivotal in the training loop you'll construct.
> **Task**: For this exercise, let's use the SGD optimizer. Instantiate it, setting our neural network parameters as the ones to be optimized and choosing a learning rate of 0.01.
%% Cell type:code id:39c8dfa8-7ea0-44e4-9429-118a6333bfe1 tags:
``` python
# Define the optimizer.
```
%% Cell type:markdown id:05e37f67-519a-4c49-97b3-2fafb7176de1 tags:
<details>
<summary>Hint (click to reveal)</summary>
To define the SGD optimizer in PyTorch, you can use:
```python
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
```
Because of how simple the task is, you will probably need a really small learning rate to reach good results.
</details>
%% Cell type:markdown id:13b2fb3e-5391-4e66-ba83-55e66935d2aa tags:
### **Setting Up the Basic Training Loop Function**
%% Cell type:markdown id:7a364925-b4d9-4ffd-b3f8-be30a5bb1613 tags:
Having a training loop within a function allows us to reuse the same code structure for different models, datasets, or other training parameters without redundancy. This modular approach also promotes code clarity and maintainability.
Let's define the training loop function which takes the model, data (inputs and targets), loss function, optimizer, and the number of epochs as parameters. The function should return the history of the loss after each epoch.
A typical training loop consists of:
1. Sending the input through the model (forward pass).
2. Calculating the loss.
3. Propagating the loss backward through the model to compute gradients (backward pass).
4. Updating the weights using the optimizer.
5. Repeating the steps for several epochs.
Training with the entire dataset as one batch can be memory-intensive and sometimes not as effective. Hence, in practice, we usually divide our dataset into smaller chunks or mini-batches and update our weights after each mini-batch.
> **Task**: Create a function named `train_model` that encapsulates the training loop for the `SimpleNet` model. The function should follow the signature the next code cell:
%% Cell type:code id:734864fe-46b6-4435-b58d-19b085ebd3f9 tags:
``` python
def train_model(model, dataloader, loss_function, optimizer, epochs):
# Your code here
pass
```
%% Cell type:markdown id:a6fee8dc-59da-4d48-918e-d6e093e997e5 tags:
<details>
<summary>Hint (click to reveal)</summary>
Here's how the train_model function might look:
```python
def train_model(model, dataloader, loss_function, optimizer, epochs):
# Store the loss values at each epoch
loss_history = []
for epoch in range(epochs):
for inputs, targets in dataloader:
# Ensure that data is on the right device
inputs, targets = inputs.to(device), targets.to(device)
# Reset the gradients to zero
optimizer.zero_grad()
# Execute a forward pass
outputs = model(inputs)
# Calculate the loss
loss = loss_function(outputs, targets)
# Conduct a backward pass
loss.backward()
# Update the weights
optimizer.step()
# Append the loss to the history
loss_history.append(loss.item())
print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss_history[-1]:.4f}")
return loss_history
```
</details>
%% Cell type:markdown id:c4e4b485-ffa6-487d-8dbc-b0b0590a796a tags:
### **Training the Neural Network**
%% Cell type:markdown id:15ba6b07-728f-4444-a3a9-af8cfeb884e1 tags:
With all the components defined in the previous sections, it's now time to integrate everything and set the training process in motion.
> **Task**: Combine all the previously defined elements to initiate the training procedure for your neural network model.
> 1. Don't forget to Move your model and to the same device (GPU or CPU).
> 2. Train the model using the `train_loader` and `val_loader`.
%% Cell type:code id:90d043f7-213d-42a7-a14b-e6b716003b70 tags:
``` python
# Your code here to initiate the training process
```
%% Cell type:markdown id:398aaeec-5d6d-4ef6-bd24-27d51b32c148 tags:
<details>
<summary>Hint (click to reveal)</summary>
To train the model, you need to integrate all the previously defined components:
```python
# Moving the model to the device
model = SimpleNet(input_size=1, hidden_size=10, output_size=1).to(device)
# Training the model using the train_loader
loss_history = train_model(model, train_loader, criterion, optimizer, epochs=50)
```
Make sure you have defined the loss_function, optimizer, and epochs in the previous sections.
</details>
%% Cell type:code id:c7cf3df1-9fe2-4eee-a5bf-386f77b257f1 tags:
``` python
import matplotlib.pyplot as plt
# Plotting the loss curve
plt.figure(figsize=(10,6))
plt.plot(loss_history, label='Training Loss')
plt.title("Loss Curve")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.show()
```
%% Cell type:markdown id:2b7f9d87-c172-427c-a2f4-1090b1120148 tags:
## **Conclusion: Moving Beyond the Basics**
%% Cell type:markdown id:6074877c-c149-4af9-8503-153455edd42a tags:
You've now built and trained a simple neural network using PyTorch, and you might be wondering: why aren't my results as good as I expected?
While you've certainly made strides, the journey of mastering deep learning and neural networks is filled with nuance, challenges, and constant learning. Here are some reasons why your results might not be optimal and what you'll discover in your next steps:
1. **Hyperparameters Tuning**: So far, we've set values like learning rate and batch size somewhat arbitrarily. These values are critical and often require careful tuning specific to each problem.
2. **Learning Rate Scheduling**: A fixed learning rate might not always be the best strategy. Reducing the learning rate during training, known as learning rate annealing or scheduling, often leads to better convergence.
3. **Model Architecture**: The neural network we built is basic. There's an entire world of architectures out there, designed for specific types of data and tasks. The right architecture can make a significant difference.
4. **Regularization**: To prevent overfitting, techniques like dropout, weight decay, and early stopping can be applied. We haven't touched upon these, but they're crucial for ensuring your model generalizes well to unseen data.
5. **Data Quality and Quantity**: While we used synthetic data for simplicity, real-world data is messy. Cleaning and preprocessing data, augmenting it, and ensuring it's representative can have a significant impact on performance.
6. **Optimization Techniques**: There are advanced optimization algorithms and techniques that can speed up training and lead to better convergence. Techniques like momentum, adaptive learning rates (e.g., Adam, RMSprop) can play a crucial role.
7. **Evaluation Metrics**: We've looked at loss values, but in real-world scenarios, understanding and selecting the right evaluation metrics for the task (accuracy, F1-score, AUC-ROC, etc.) is vital.
8. **Training Dynamics**: Understanding how models train, visualizing the activations, weights, and gradients, and knowing when and why a model is struggling can offer insights into how to improve performance.
Remember, while the mechanics of building and training a neural network are essential, the art of deep learning lies in understanding the nuances and iterating based on insights and knowledge. The next steps in your learning, focusing on methodology, will provide the tools and knowledge to navigate these complexities and achieve better results.
Keep learning, experimenting, and iterating! The world of deep learning is vast, and there's always something new to discover.
%% Cell type:markdown id:ca6048e4-f3cf-40eb-bd50-c95f281f0554 tags:
## **Extra for the Fast Movers: Diving Deeper**
%% Cell type:markdown id:46a25dfd-1cc9-444d-98d6-966e7cc9da07 tags:
To further enhance your understanding and capability with PyTorch, this section introduces additional topics that cater to more advanced use-cases. These tools and techniques can be essential when dealing with larger and more complex projects, providing valuable insights into optimization and performance.
%% Cell type:markdown id:30edeed8-321b-4b1f-ace6-0decd8a167e5 tags:
### **Profiling with PyTorch Profiler in TensorBoard**
%% Cell type:markdown id:256bd4a2-aa6f-4a50-9c5d-854ca25293de tags:
PyTorch, starting from version 1.9.0, incorporates the PyTorch Profiler as a TensorBoard plugin. This integration allows users to profile their PyTorch code and visualize the results directly within TensorBoard.
Below, we will be instrumenting PyTorch Code for TensorBoard Profiling.
Use this [documentation](http://www.idris.fr/jean-zay/pre-post/profiler_pt.html) to achieve the next tasks.
> **Task:** Before instrumenting your PyTorch code, you'll need to import the necessary modules for profiling.
> **Task:** Modify the training loop to invoke the profiler.
%% Cell type:code id:86b471a6-7de6-40f0-af58-c41e8e8acbae tags:
``` python
# Your imports here
# Your code here
def train_model_with_profiling(model, train_loader, criterion, optimizer, epochs, profiler_dir='./profiler'):
# Your code here
pass
```
%% Cell type:markdown id:f389816a-fa2a-4668-9f0b-07d2a5abf5e1 tags:
<details>
<summary>Hint (click to reveal)</summary>
```python
from torch.profiler import profile, tensorboard_trace_handler, ProfilerActivity, schedule
def train_model_with_profiling(model, dataloader, loss_function, optimizer, epochs, profiler_dir='./profiler'):
# Store the loss values at each epoch
loss_history = []
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
schedule=schedule(wait=1, warmup=1, active=12, repeat=1),
on_trace_ready=tensorboard_trace_handler(profiler_dir)) as prof:
for epoch in range(epochs):
for inputs, targets in dataloader:
# Ensure that data is on the right device
inputs, targets = inputs.to(device), targets.to(device)
# Reset the gradients to zero
optimizer.zero_grad()
# Execute a forward pass
outputs = model(inputs)
# Calculate the loss
loss = loss_function(outputs, targets)
# Conduct a backward pass
loss.backward()
# Update the weights
optimizer.step()
# Append the loss to the history
loss_history.append(loss.item())
# Notify profiler of step boundary
prof.step()
print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss_history[-1]:.4f}")
return loss_history
```
Make sure you have defined the loss_function, optimizer, and epochs in the previous sections.
</details>
%% Cell type:code id:cb82f0a9-522f-4746-87f9-ba7b7952d863 tags:
``` python
# Training the model using the train_loader
loss_history = train_model_with_profiling(model, train_loader, criterion, optimizer, 10, profiler_dir='./profiler')
```
%% Cell type:markdown id:313e4f40-521a-4beb-a278-c1ca9502b499 tags:
> **Task:** Visualize the profiling, you will need to open a Tensorboard interface using the Blue button on the top left corner.
>
> **Make sur to specify the logdir with "--logid=/path/to/profiler_folder".**
%% Cell type:markdown id:06f86768-3b78-4874-b083-64bc365080fb tags:
### **Learning Rate Scheduling**
%% Cell type:markdown id:44721444-ba4a-44d0-9b65-16890dd4f097 tags:
One of the key hyperparameters to tune during neural network training is the learning rate. While it's possible to set a static learning rate for the entire training process, in practice, dynamically adjusting the learning rate often leads to better convergence and overall performance. This dynamic adjustment is often referred to as learning rate scheduling or annealing.
Concept of Learning Rate Scheduling
The learning rate determines the step size at each iteration while moving towards a minimum of the loss function. If it's too large, the optimization might overshoot the minimum. Conversely, if it's too small, the training might get stuck, or convergence could be very slow.
A learning rate scheduler changes the learning rate during training based on the provided scheduling policy. By adjusting the learning rate during training, you can achieve faster convergence and better final results.
Using Learning Rate Schedulers in PyTorch
PyTorch provides a variety of learning rate schedulers through the torch.optim.lr_scheduler module. Some of the popular ones are:
- StepLR: Decays the learning rate of each parameter group by gamma every step_size epochs.
- ExponentialLR: Decays the learning rate of each parameter group by gamma every epoch.
- ReduceLROnPlateau: Reduces the learning rate when a metric has stopped improving.
> **Task:** Take a look at the [documentation]() or click on the hint in the following cell then integrate an LR scheduler in your own code that you wrote before
%% Cell type:markdown id:0c79a170-35d0-438f-b01b-a3f236f8b724 tags:
<details>
<summary>Hint (click to reveal)</summary>
Below, you have a typical training loop with a learning rate scheduler.
```python
from torch.optim.lr_scheduler import StepLR
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
for epoch in range(epochs):
for input, target in data:
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
# Step the learning rate scheduler
scheduler.step()```
</details>
%% Cell type:markdown id:33f99f6e-3120-495a-a25b-8b9f3d14deb2 tags:
### **Automatic Mixed Precision**
%% Cell type:markdown id:217a7249-6655-4587-92b8-72dea7de8c9d tags:
Training deep neural networks can be both time-consuming and resource-intensive. One way to address this problem is by leveraging mixed precision training. In essence, mixed precision training uses both 16-bit and 32-bit floating-point types to represent numbers in the model, which can speed up training without sacrificing the accuracy of the final model.
**Overview of AMP (Automatic Mixed Precision)**
AMP (Automatic Mixed Precision) is a set of utilities provided by PyTorch to enable mixed precision training more effortlessly. The main advantages of AMP are:
- Faster Training: By using reduced precision, the model requires less memory bandwidth, resulting in faster data transfers and faster matrix multiplication.
- Reduced GPU Memory Usage: This enables training of larger models or utilization of larger batch sizes.
PyTorch has integrated the AMP utilities starting from version 1.6.
> **Task**: Setup AMP in the training function by checking the [documentation](http://www.idris.fr/eng/ia/mixed-precision-eng.html). You will need to do the necessary imports, initialize the GradScaler, modify the training loop by including "with autocast():" around the forward and loss computation.
%% Cell type:code id:ad131b4b-02ba-472d-af78-a048868e3efc tags:
``` python
# Your code here
```
%% Cell type:markdown id:de38cb30-7b24-48cb-b804-ed296e38e3fb tags:
<details>
<summary>Hint (click to reveal)</summary>
Below, you have a typical training loop with autocast.
```python
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()
for epoch in epochs:
for input, target in data:
optimizer.zero_grad()
with autocast():
output = model(input)
loss = loss_fn(output, target)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
```
</details>
%% Cell type:markdown id:a3f7818a-fea1-4a12-b52a-cd83e0ae2ffe tags:
### **Pytorch Compiler**
%% Cell type:markdown id:dbb5f69b-009e-40b3-94f0-5a420afbd003 tags:
**For this section, you will need to use Pytorch with a version superior to 2.0.**
PyTorch, a widely adopted deep learning framework, has consistently evolved to offer users better performance and ease of use. One such advancement is the introduction of the PyTorch Compiler. This cutting-edge feature accelerates PyTorch code execution by JIT-compiling it into optimized kernels. What's even more impressive is its ability to enhance performance with minimal modifications to the original codebase.
Historically, PyTorch has introduced compiler solutions like TorchScript and FX Tracing. However, the introduction of torch.compile with PyTorch 2.0 has taken performance optimization to a new level. It provides a seamless experience, enabling you to transform typical PyTorch functions and even torch.nn.Module instances into their faster, compiled counterparts.
For those eager to dive deep into its workings and benefits, detailed documentation and tutorials have been made available:
- [torch.compile Tutorial](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html)
- [PyTorch 2.0 Release Notes](https://pytorch.org/get-started/pytorch-2.0/)
> **Task:** Your task is to make your existing PyTorch model take advantage of the performance benefits offered by torch.compile. This will not only make your model run faster but also give you hands-on experience with one of the latest features in PyTorch.
%% Cell type:markdown id:8d5236bc-08e4-4142-8c9c-fd7007474ff2 tags:
<details>
<summary>Hint (click to reveal)</summary>
1. **Ensure Dependencies**:
- Ensure that you have the required dependencies, especially PyTorch version 2.0 or higher.
2. **Check for GPU Compatibility**:
- For optimal performance, it's recommended to use a modern NVIDIA GPU (H100, A100, or V100).
3. **Compile Functions**:
- You can optimize arbitrary Python functions as shown in the example:
```python
def your_function(x, y):
# ... Your PyTorch code here ...
opt_function = torch.compile(your_function)
```
- Alternatively, use the decorator approach:
```python
@torch.compile
def opt_function(x, y):
# ... Your PyTorch code here ...
```
4. **Compile Modules**:
- If you have a PyTorch module (a class derived from `torch.nn.Module`), you can compile it similarly:
```python
class YourModule(torch.nn.Module):
# ... Your module definition here ...
model = YourModule()
opt_model = torch.compile(model)
```
</details>
%% Cell type:markdown id:bd4066a6-3f24-4b63-b2be-da0350ec6145 tags:
Remember, while torch.compile optimizes performance, the underlying logic remains the same. Ensure to test and validate your compiled model's outputs against the original to confirm consistent behavior.
%% Cell type:markdown id:4340d5df tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [TSB1] - Tensorboard with/from Jupyter
<!-- DESC --> 4 ways to use Tensorboard from the Jupyter environment
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Using [**Tensorboard**](https://www.tensorflow.org/tensorboard/get_started)
## What we're going to do :
- Using Tensorboard
%% Cell type:markdown id: tags:
## In the Fidle environment :
To access logs with tensorboad :
- Under **Docker**, from a terminal launched via the jupyterlab launcher, use the following command:<br>
```tensorboard --logdir <path-to-logs> --host 0.0.0.0```
- If you're **not using Docker**, from a terminal :<br>
```tensorboard --logdir <path-to-logs>```
**Note:** One tensorboard instance can be used simultaneously.
%% Cell type:markdown id: tags:
## Otherwise, in the real world, from Jupyter (***)
It's the easiest and the best way \!
Launch Tensorboard directly from Jupiter.
Works very fine on Jean-Zay (at IDRIS) :-)
%% Cell type:markdown id: tags:
## Otherwise, in the real word, Tensorboard as a magic command (**)
Tensorboard can be run from Jupiter with a magic command.
See [documentation](https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks)
Load the extention : ```%load_ext tensorboard```
Start tensorboard : ```%tensorboard --logdir logs```
%% Cell type:raw id: tags:
%load_ext tensorboard
%tensorboard --logdir logs
%% Cell type:markdown id: tags:
## Otherwise, in the real word, Option 2 - Shell command (*)
Basic way, from a shell
More about it : `# tensorboard --help`
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3LSTM1] - Basic Keras LSTM Layer
<!-- DESC --> A small example of an LSTM layer in Keras
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import numpy as np
```
%% Cell type:code id: tags:
``` python
input = keras.random.normal( [32, 20, 8] )
lstm = keras.layers.LSTM(16)
output = lstm(input)
print('input shape is : ',input.shape)
print('output shape is : ',output.shape)
```
%% Cell type:code id: tags:
``` python
input = keras.random.normal( [32, 20, 8] )
lstm = keras.layers.LSTM(18, return_sequences=True, return_state=True)
output, memory_state, carry_state = lstm(input)
print('input shape : ',input.shape)
print('output shape : ',output.shape)
print('memory_state : ', memory_state.shape)
print('carry_state : ', memory_state.shape)
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
German Traffic Sign Recognition Benchmark (GTSRB) <img width="800px" src="../fidle/img/header.svg"></img>
=================================================
---
Introduction au Deep Learning (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
## Episode 5.1 : Full Convolutions / run # <!-- TITLE --> [PGRAD1] - Gradient illustration with PyTorch
<!-- DESC --> Exemple de calcul d'un gradient avec PyTorch
Our main steps: <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
- Run Full-convolution.ipynb as a batch :
- Notebook mode
- Script mode
- Tensorboard follow up
## 1/ Run a notebook as a batch ## Objectives :
To run a notebook : - Exemple de calcul d'un gradient avec PyTorch
```jupyter nbconvert --to notebook --execute <notebook>```
%% Cell type:raw id: tags: ## What we're going to do :
%%bash - Exemple de calcul d'un gradient avec PyTorch
# ---- This will execute and save a notebook %% Cell type:code id: tags:
#
jupyter nbconvert --ExecutePreprocessor.timeout=-1 --to notebook --output='./run/full_convolutions' --execute '05-Full-convolutions.ipynb' ``` python
import torch
```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 2/ Export as a script (better choice) ## Pure Python
To export a notebook as a script :
```jupyter nbconvert --to script <notebook>```
To run the script :
```ipython <script>```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
%%bash # ---- My basic function f
def f(x):
y = x*x + 4*x - 5
return y
def df(x):
y=2*x + 4
return y
# ---- Examples :
print('f(1) is : ', f(1))
print('f(2) is : ', f(2))
# ---- This will convert a notebook to a notebook.py script print('df(3) is : ',df(3))
#
jupyter nbconvert --to script --output='./run/full_convolutions_B' '05-Full-convolutions.ipynb'
``` ```
%% Output %% Output
[NbConvertApp] Converting notebook 05-Full-convolutions.ipynb to script f(1) is : 0
[NbConvertApp] Writing 11305 bytes to ./run/full_convolutions_B.py f(2) is : 7
df(3) is : 10
%% Cell type:markdown id: tags:
## Using Torch
%% Cell type:markdown id: tags:
Get a nice tensor, with `requires_grad=True` :-)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!ls -l ./run/*.py x = torch.tensor(3.0, requires_grad = True)
print("x:", x)
``` ```
%% Output %% Output
-rw-r--r-- 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py x: tensor(3., requires_grad=True)
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## 3/ Batch submission Define our function..
Create batch script :
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
%%writefile "./run/batch_full_convolutions_B.sh" y = x*x + 4*x + - 5
#!/bin/bash
#OAR -n Full convolutions
#OAR -t gpu
#OAR -l /nodes=1/gpudevice=1,walltime=01:00:00
#OAR --stdout _batch/full_convolutions_%jobid%.out
#OAR --stderr _batch/full_convolutions_%jobid%.err
#OAR --project deeplearningshs
#---- For cpu
# use :
# OAR -l /nodes=1/core=32,walltime=01:00:00
# and add a 2>/dev/null to ipython xxx
# ----------------------------------
# _ _ _
# | |__ __ _| |_ ___| |__
# | '_ \ / _` | __/ __| '_ \
# | |_) | (_| | || (__| | | |
# |_.__/ \__,_|\__\___|_| |_|
# Full convolutions
# ----------------------------------
#
CONDA_ENV=deeplearning2
RUN_DIR=~/fidle/GTSRB
RUN_SCRIPT=./run/full_convolutions_B.py
# ---- Cuda Conda initialization
#
echo '------------------------------------------------------------'
echo "Start : $0"
echo '------------------------------------------------------------'
#
source /applis/environments/cuda_env.sh dahu 10.0
source /applis/environments/conda.sh
#
conda activate "$CONDA_ENV"
# ---- Run it...
#
cd $RUN_DIR
ipython $RUN_SCRIPT
``` ```
%% Output %% Cell type:markdown id: tags:
Writing ./run/batch_full_convolutions_B.sh Compute gradient with the backward function
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
%%bash y.backward()
chmod 755 ./run/*.sh
chmod 755 ./run/*.py
ls -l ./run/*full_convolutions*
``` ```
%% Output %% Cell type:code id: tags:
-rwxr-xr-x 1 pjluc pjluc 1045 Jan 21 00:15 ./run/batch_full_convolutions_B.sh ``` python
-rwxr-xr-x 1 pjluc pjluc 611 Jan 19 15:53 ./run/batch_full_convolutions.sh dx=x.grad
-rwxr-xr-x 1 pjluc pjluc 11305 Jan 21 00:13 ./run/full_convolutions_B.py print('dx=',dx)
```
%% Cell type:raw id: tags: %% Output
%%bash dx= tensor(10.)
./run/batch_full_convolutions.sh
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Running Tensorboard from Jupyter lab <img width="800px" src="../fidle/img/header.svg"></img>
====================================
---
Introduction au Deep Learning (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
Vesion : 1.0
%% Cell type:markdown id: tags:
## 1/ Méthode 1 : Shell command
%% Cell type:code id: tags: # <!-- TITLE --> [FID1] - Exemple de notebook Fidle
<!-- DESC --> Un simple exemple de notebook Fidle
``` python
%%bash
tensorboard_start --logdir ./run/logs
```
%% Cell type:code id: tags: <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
``` python %% Cell type:markdown id: tags:
%%bash
tensorboard_status
```
%% Cell type:code id: tags:
``` python > **Note :** Penser à bien remplir les tags TITLE, DESC et AUTHOR de la cellule ci-dessus (Voir source markdown)
%%bash
tensorboard_stop
```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Méthode 2 : Magic command # Step 1 - Init Python
**Start**
> Penser à **importer** le **module Fidle**
> Penser à effectuer **l'initialisation de l'environnement Fidle**
> `FID1` est l'identifiant du notebook (run_id)
> `run_dir` est un dossier où mettre les outputs du notebook (typiquement ./run/<run_id>)
> `datasets_dir` le dossier où sont les datasets Fidle
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
%load_ext tensorboard import torch
```
%% Cell type:code id: tags: import fidle
``` python # Init Fidle environment
%tensorboard --port 21277 --host 0.0.0.0 --logdir ./run/logs run_id, run_dir, datasets_dir = fidle.init('FID1')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
**Stop** ## Parameters
No way... use bash method > Nous avons ici (par exemple) 3 paramètres : scale, x et batch_size
## Methode 3 : Tensorboard module
**Start**
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import tensorboard.notebook as tsb scale = 0.1
x=12
batch_size=64
``` ```
%% Cell type:markdown id: tags:
> L'appel ci-dessous permet de définir les parametres modifiables lors d'une exécution batch via la commande fid `run_ci...`
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
tsb.start('--port 21277 --host 0.0.0.0 --logdir ./run/logs') fidle.override('scale', 'x','batch_size')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
**Check** ## Working part...
(Tout ce que fait notre notebook...)
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
a=tsb.list() print('scale=',scale)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
**Stop** ## End part
No way... use bash method
%% Cell type:code id: tags: %% Cell type:markdown id: tags:
``` python > Pour terminer le notebook, on peut :
!kill 214798 > - faire un `fidle.end()` pour afficher quelques infos utiles
``` > - insérer un beau logo en markdown
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
fidle.end()
``` ```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
......
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [OPT1] - Training setup optimization
<!-- DESC --> The goal of this notebook is to go through a typical deep learning model training
<!-- AUTHOR : Kamel Guerda (CNRS/IDRIS), Léo Hunout (CNRS/IDRIS) -->
## Objectives :
**Practice lab : Optimize your training process**
%% Cell type:markdown id: tags:
## Introduction
This Lab takes place as a pratical exercice of the [fidle](https://fidle.cnrs.fr/) online course N°16.
The goal of this notebook is to go through a typical deep learning model training. We will see what can be changed to optimize this training setup but also good practices to make more efficient experiments.
This notebook makes use of:
- The CIFAR10 dataset
- A Resnet model
- Pytorch
- A GPU (the notebook can be ran on Jean-Zay if you have an account, on Google collab with a 16go gpu or at home with a dedicated gpu by scaling down the batch_size)
In particular we will work on:
- the dataloader strategy used to load data
- the model initial weights, in particular using a pretrained model
- the learning rate and learning rate scheduler
- the optimizer
- visualizing and comparing results using python, tensorboard
- various good practices/reminders
> First, you can do a complete execution of the notebook.
> **Then comeback from the start and follow the instructions to edit various components for better performance. You can also change them during the first execution if you have some intuitions about what should be changed and how.**
> **In order to compare performance, only change the xxx_optim variables which are the one you will use in your optimized training**
%% Cell type:code id: tags:
```
!nvidia-smi
```
%% Cell type:markdown id: tags:
## Few imports
%% Cell type:code id: tags:
```
import os
import time
import random
import numpy as np
import torch
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import _LRScheduler
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.models.resnet import ResNet18_Weights
import matplotlib.pyplot as plt
from datetime import datetime
from torch.utils.tensorboard import SummaryWriter
```
%% Cell type:markdown id: tags:
## Fix random seeds
In order to have experiment reproductibility, it is a good practice to fix the random number generators seeds.
Warning : there might be more seeds to set than you expect! Maths,visualization,transformations libraries, ...
%% Cell type:code id: tags:
```
random.seed(123)
np.random.seed(123)
torch.manual_seed(123)
```
%% Cell type:markdown id: tags:
## Some functions
Below we define a few functions that will be used further in the notebook.
**Do not change them unless you know what and why you are doing it.**
%% Cell type:code id: tags:
```
def iter_dataloader(dataloader, epochs, args):
for epoch in range(epochs):
for i, (images, labels) in enumerate(dataloader):
# distribution of images and labels to all GPUs
images = images.to(args['device'], non_blocking=True)
labels = labels.to(args['device'], non_blocking=True)
def evaluate(dataloader, model, criterion, args):
'''
A simple loop for evaluation
'''
loss = 0
correct = 0
total = 0
with torch.no_grad():
for i, (images, labels) in enumerate(dataloader):
# distribution of images and labels to all GPUs
images = images.to(args['device'], non_blocking=True)
labels = labels.to(args['device'], non_blocking=True)
outputs = model(images)
loss = criterion(outputs,labels)
_, predicted = torch.max(outputs.data, 1)
loss += loss
total += labels.size(0)
correct += (predicted == labels).sum().item()
loss = (loss/total).item()
accuracy = (correct/total)*100
return loss, accuracy
def train_default(train_loader, val_loader, model, optimizer, criterion, args):
'''
The default simple training loop
'''
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
time_start = time.time()
for epoch in range(args['epochs']):
print("Epoch ", epoch)
for i, (images, labels) in enumerate(train_loader):
# distribution of images and labels to all GPUs
images = images.to(args['device'], non_blocking=True)
labels = labels.to(args['device'], non_blocking=True)
# Zero the parameter gradients
optimizer.zero_grad()
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward pass
loss.backward()
# Optimize
optimizer.step()
# Evaluate at the end of the epoch on the train set
train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
train_losses.append(train_loss)
train_accuracies.append(train_accuracy)
# Evaluate at the end of the epoch on the val set
val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
duration = time.time() - time_start
print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
results = {'model':model,
'train_losses': train_losses,
'train_accuracies': train_accuracies,
'val_losses': val_losses,
'val_accuracies': val_accuracies,
'duration':duration}
return results
def explore_lrs(dataloader,
model,
optimizer,
args,
min_learning_rate_power=-8,
max_learning_rate_power = 1,
num_lrs=10,
steps_per_lr=50):
lrs = np.logspace(min_learning_rate_power, max_learning_rate_power, num=num_lrs)
print("Learning rate space : ", lrs)
model_init_state = model.state_dict()
lrs_losses, lrs_metric_avg, lrs_metric_var =[], [],[]
# Iterate through learning rates to test
for lr in lrs:
print("Testing lr:", '{:.2e}'.format(lr))
# Reset model
model.load_state_dict(model_init_state)
# Change learning rate in optimizer
for group in optimizer.param_groups:
group['lr'] = lr
# Reset metric tracking
lr_losses =[]
# Training steps
for step in range(steps_per_lr):
images, labels = next(iter(dataloader))
# distribution of images and labels to all GPUs
images = images.to(args['device'], non_blocking=True)
labels = labels.to(args['device'], non_blocking=True)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
lr_losses.append(loss.item())
print(lr_losses)
# Compute loss average for lr
lr_loss_avg = np.mean(lr_losses)
lr_loss_avg = lr_losses[-1]
lrs_losses.append(lr_loss_avg)
# Compute metric (discounted average gradient of the loss)
lr_gradients = np.gradient(lr_losses)
lr_metric_avg = np.mean(lr_gradients)
lr_metric_var = np.var(lr_gradients)
lrs_metric_avg.append(lr_metric_avg)
lrs_metric_var.append(lr_metric_var)
model.load_state_dict(model_init_state)
return lrs, lrs_losses, lrs_metric_avg, lrs_metric_var
def plot_eval(lrs, lrs_losses, lrs_metric_avg, lrs_metric_var):
print("lrs: ", lrs)
print("lrs_losses: ", lrs_losses)
print("lrs_metric_avg: ", lrs_metric_avg)
print("lrs_metric_var: ", lrs_metric_var)
fig, axs = plt.subplots(3, figsize=(10,15))
axs[0].plot(lrs, lrs_losses, color='blue', label="losses_avg")
axs[0].set_xlabel('learning rate', fontsize=15)
axs[0].set_ylabel('Loss', fontsize=15)
axs[0].set_xscale('log')
axs[0].set_yscale('symlog')
axs[0].set_ylim([0, min(lrs_losses)*100])
axs[1].plot(lrs, lrs_metric_avg, color='red', label="discounted_metric_avg")
axs[1].hlines(y=0, xmin=lrs[0], xmax=lrs[-1], linewidth=2, color='black')
axs[1].set_xlabel('learning rate', fontsize=15)
axs[1].set_ylabel('Metric average', fontsize=15)
axs[1].set_xscale('log')
axs[1].set_yscale('symlog')
axs[1].set_ylim([-abs(lrs_metric_avg[0])*100, abs(lrs_metric_avg[0])*100])
axs[2].plot(lrs, lrs_metric_var, color='green', label="discounted_metric_var")
axs[2].set_xlabel('learning rate', fontsize=15)
axs[2].set_ylabel('Metric variance', fontsize=15)
axs[2].set_xscale('log')
axs[2].set_yscale('symlog')
axs[2].set_ylim([0, min(lrs_metric_var)*1000])
plt.show()
def compare_trainings(results_default, results_optim):
fig, axs = plt.subplots(2, figsize=(10,10))
fig.suptitle('Performance comparison', fontsize=18)
train_alpha = 0.5
# Validation losses
axs[0].plot(range(len(results_default['val_losses'])), results_default['val_losses'], color='blue', label="default val")
axs[0].plot(range(len(results_optim['val_losses'])), results_optim['val_losses'], color='red', label="optim val")
# Training losses
axs[0].plot(range(len(results_default['train_losses'])), results_default['train_losses'], color='blue', label="default train", linestyle='--', alpha = train_alpha)
axs[0].plot(range(len(results_optim['train_losses'])), results_optim['train_losses'], color='red', label="optim train", linestyle='--', alpha = train_alpha)
axs[0].set_xlabel('Epochs', fontsize=14)
axs[0].set_ylabel('Loss', fontsize=14)
axs[0].set_xscale('linear')
axs[0].set_yscale('linear')
max_loss = max(results_default['train_losses']+results_default['val_losses']+results_optim['train_losses']+results_optim['val_losses'])
axs[0].set_ylim([0, max_loss])
axs[0].legend(loc="upper right")
# Validation accuracies
axs[1].plot(range(len(results_default['val_accuracies'])), results_default['val_accuracies'], color='blue', label="default val")
axs[1].plot(range(len(results_optim['val_accuracies'])), results_optim['val_accuracies'], color='red', label="optim val")
# Training default accuracies
axs[1].plot(range(len(results_default['train_accuracies'])), results_default['train_accuracies'], color='blue', label="default train", linestyle='--', alpha=train_alpha)
axs[1].plot(range(len(results_optim['train_accuracies'])), results_optim['train_accuracies'], color='red', label="optim train", linestyle='--', alpha=train_alpha)
axs[1].set_xlabel('Epochs', fontsize=15)
axs[1].set_ylabel('Accuracy', fontsize=15)
axs[1].set_xscale('linear')
axs[1].set_yscale('linear')
axs[1].set_ylim([0, 100])
axs[1].legend(loc="lower right")
```
%% Cell type:markdown id: tags:
## Training configuration variables
For the first run, you can let all the values given by default.
For the optimized run, you could changing some parameters.
>In particular, you will have to change :
>- the batch_size
>- the learning rate
%% Cell type:code id: tags:
```
args = {
'batch_size':64,
'epochs': 10,
'image_size': 224,
'learning_rate': 0.001,
'momentum': 0.9,
'weight_decay': 0.0001,
'download': True,
'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
'dataset_root_dir': os.getcwd(),
}
#################################################
############# Modify the code below #############
#################################################
args_optim = {
'batch_size':64,
'epochs': 10,
'image_size': 224,
'learning_rate': 0.001,
'momentum': 0.9,
'weight_decay': 0.0001,
'download': True,
'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
'dataset_root_dir': os.getcwd(),
}
```
%% Cell type:markdown id: tags:
<details>
<summary>Spoiler (click to reveal)</summary>
```python
args_optim = {
'batch_size':512,
'epochs': 10,
'image_size': 224,
'learning_rate': 0.001,
'momentum': 0.9,
'weight_decay': 0.01,
'download': True,
'device': torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"),
'dataset_root_dir': os.getcwd(),
}
```
</details>
%% Cell type:markdown id: tags:
## Data transformation and augmentation
Below, we define the transformations to apply to each image when loaded.
It can serve three main purposes:
- having the data in the desired format for the model (systematic transformation)
- correcting/normalizing the data (systematic transformation)
- artificially increasing the amount of data by transforming the data (random transformation)
Warning : the evaluation dataset should always be the same so you should not apply random transformations to it.
> Enrich the transformations by using the provided by torchvision : https://pytorch.org/vision/0.12/transforms.html
> **Change transform_optim and val_transform_optim only**
%% Cell type:code id: tags:
```
transform = transforms.Compose([transforms.ToTensor()]) # convert the PIL Image to a tensor
val_transform = transforms.Compose([transforms.ToTensor()]) # convert the PIL Image to a tensor
#################################################
############# Modify the code below #############
#################################################
transform_optim = transforms.Compose([transforms.ToTensor()]) # convert the PIL Image to a tensor
val_transform_optim = transforms.Compose([transforms.ToTensor()]) # convert the PIL Image to a tensor
```
%% Cell type:markdown id: tags:
<details>
<summary>Spoiler</summary>
```python
transform_optim = transforms.Compose([
transforms.RandomHorizontalFlip(), # Horizontal Flip - Data Augmentation
transforms.ToTensor() # convert the PIL Image to a tensor
])
val_transform_optim = transforms.Compose([
transforms.ToTensor() # convert the PIL Image to a tensor
])
```
</details>
%% Cell type:markdown id: tags:
## Dataset
In the cell below, we define the dataset.
Here we have two subset:
- a training subset for model optimization
- a test subset for model evaluation
%% Cell type:code id: tags:
```
train_dataset = torchvision.datasets.CIFAR10(root=args['dataset_root_dir']+'/CIFAR_10', train=True, download=args['download'], transform=transform)
val_dataset = torchvision.datasets.CIFAR10(root=args['dataset_root_dir']+'/CIFAR_10', train=False, download=args['download'], transform=val_transform)
train_dataset_optim = torchvision.datasets.CIFAR10(root=args_optim['dataset_root_dir']+'/CIFAR_10', train=True, download=args_optim['download'], transform=transform_optim)
val_dataset_optim = torchvision.datasets.CIFAR10(root=args_optim['dataset_root_dir']+'/CIFAR_10', train=False, download=args_optim['download'], transform=val_transform_optim)
```
%% Cell type:markdown id: tags:
## Dataloader
The DataLoader class in PyTorch is responsible for loading and batching data from a dataset object, such as a PyTorch tensor or a NumPy array.
It works by creating a Python iterable over the dataset and yielding a batch of data at each iteration.
Those batches will be fed to the model for training or inference.
The DataLoader class also provides various options for shuffling, batching, and parallelizing the data loading process, making it a useful tool for efficient and flexible data handling in PyTorch.
> Take a look at the DataLoader documentation : https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader
> Optimize the dataloader by taking advantage of parallelism and smart use of computational ressources :
>- batch_size
>- pin_memory
>- prefetch_factor
>- persistent_workers
>- num_workers
%% Cell type:code id: tags:
```
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=args['batch_size'],
shuffle=True,
drop_last=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
batch_size=args['batch_size'],
shuffle=False,
drop_last=True)
#################################################
############# Modify the code below #############
#################################################
train_loader_optim = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=args_optim['batch_size'],
shuffle=True,
drop_last=True)
val_loader_optim = torch.utils.data.DataLoader(dataset=val_dataset,
batch_size=args_optim['batch_size'],
shuffle=False,
drop_last=True)
```
%% Cell type:code id: tags:
```
%timeit -r 1 -n 1 iter_dataloader(train_loader, 1, args)
%timeit -r 1 -n 1 iter_dataloader(train_loader_optim, 1, args_optim)
```
%% Cell type:markdown id: tags:
<details>
<summary>Spoiler</summary>
WIP : Quelques explications
```python
train_loader_optim = torch.utils.data.DataLoader(dataset=train_dataset_optim,
batch_size=args_optim['batch_size'],
shuffle=True,
drop_last=True,
num_workers=10,
persistent_workers=True,
pin_memory=True,
prefetch_factor=10)
val_loader_optim = torch.utils.data.DataLoader(dataset=val_dataset_optim,
batch_size=args_optim['batch_size'],
shuffle=False,
drop_last=True,
num_workers=10,
persistent_workers=True,
pin_memory=True,
prefetch_factor=10)
```
</details>
%% Cell type:markdown id: tags:
## Model
> Do not forget to verify that you use the right compute ressources for your model
> By default, the model resnet18 is initialized with random weights but you could try using a pretrained model : https://pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html#torchvision.models.ResNet18_Weights
%% Cell type:code id: tags:
```
model = models.resnet18()
model = model.to(args['device'])
model.name = 'Resnet-18'
print("Stock model on device:", next(model.parameters()).device)
#################################################
############# Modify the code below #############
#################################################
model_optim = models.resnet18()
model_optim = model_optim.to(args_optim['device'])
model_optim.name = 'Resnet-18'
print("Optimized model on device:", next(model_optim.parameters()).device)
```
%% Cell type:markdown id: tags:
<details>
<summary>Spoiler</summary>
```python
model_optim = models.resnet18(ResNet18_Weights)
model_optim = model_optim.to(args_optim['device'])
model_optim.name = 'Resnet-18'
print("Optimized model on device:", next(model_optim.parameters()).device)
```
</details>
%% Cell type:markdown id: tags:
## Loss
We use a standart loss for classification.
For the comparison, if you change the loss, change it for both.
%% Cell type:code id: tags:
```
criterion = torch.nn.CrossEntropyLoss()
criterion_optim = torch.nn.CrossEntropyLoss()
```
%% Cell type:markdown id: tags:
## Optimizer
> In order to speed up the training, you can try to use a different optimizer: https://pytorch.org/docs/stable/optim.html#base-class
%% Cell type:code id: tags:
```
optimizer = torch.optim.SGD(model.parameters(), args['learning_rate'], args['momentum'], args['weight_decay'])
#################################################
############# Modify the code below #############
#################################################
optimizer_optim = torch.optim.SGD(model.parameters(), args_optim['learning_rate'], args_optim['momentum'], args_optim['weight_decay'])
```
%% Cell type:markdown id: tags:
<details>
<summary>Spoiler</summary>
```python
optimizer_optim = torch.optim.AdamW(model_optim.parameters(), lr = args_optim['learning_rate'], weight_decay=args_optim['weight_decay'])
```
</details>
%% Cell type:markdown id: tags:
## Learning rate scheduler
In order to adjust the learning rate over iterations/epochs, we can make use of a learning rate scheduler.
To use a LR scheduler, you will need to :
- instantiate the scheduler (in the coding cell below)
- adapt the training loop (in the "Training" section)
Take a look at this page : https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate which:
- describes how to use a scheduler (warning : some scheduler are updated at a step level and others at an epoch level)
- lists the available schedulers (you could also create your own starting from the _LRScheduler class)
> **You can define your scheduler here.**
> **You will have to modify the training loop later on.**
%% Cell type:code id: tags:
```
scheduler = None
#################################################
############# Modify the code below #############
#################################################
scheduler_optim = None
```
%% Cell type:markdown id: tags:
<details>
<summary>Spoiler</summary>
```python
scheduler_optim = torch.optim.lr_scheduler.OneCycleLR(optimizer_optim,
max_lr=args_optim['learning_rate'],
steps_per_epoch = len(train_loader_optim),
epochs=args_optim['epochs'])
```
</details>
%% Cell type:markdown id: tags:
## Model training (reference performances)
Once we have all our main actors, we can setup the stage that is our training loop.
Below is used a typical loop as you can find in https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
> **Run it a first time to have a performance baseline with all the default values.**
%% Cell type:code id: tags:
```
results_default = train_default(train_loader, val_loader, model, optimizer, criterion, args)
```
%% Cell type:markdown id: tags:
## Speeding up the hyperparameter search : Learning Rate Finder
Wether we are using a scheduler or not, we need to determine either :
- the constant learning rate you want to use,
- or the maximum learning rate used by the scheduler.
If you are in the first situation, you just want a good all-rounder learning rate to have a relatively fast conversion and minimize the oscillations at the end of the convergence.
In the second situation, you can focus more on having the fastest inital convergence as the oscillations will be generally taken care by a decreasing learning rate strategy. Thus, we want the highest maximum learning rate possible.
It would be ideal to find the best learning rate quickly in order to speedup our hyperparameter search.
Various strategy more or less complex exists to find an estimate of this value.
Below, we try to find the learning rate by doing a few steps on a range of learning rates. We evaluate each learning rate to determine the best one to choose for our full training.
> **As this step can take quite some time, we provided you with some values for the default config which you are not supposed to change anyway. You can find them in the next spoiler**
> **Uncomment explore_lrs to rerun the exploration, otherwise you can reuse the given values.**
> **Be careful to re-run this cell to reset the model and optimizer,... to have a "fresh" exploration each time**
> **Also if you change the optimizer for the optimized run, change it also here to find the best learning rate for that optimizer.** Or rerun the cell where you defined it.
%% Cell type:code id: tags:
```
lrs, lrs_losses, lrs_metric_avg, lrs_metric_var = explore_lrs(train_loader_optim,
model_optim,
optimizer_optim,
args_optim,
min_learning_rate_power=-6,
max_learning_rate_power = 1,
num_lrs=8,
steps_per_lr=100)
```
%% Cell type:markdown id: tags:
<details>
<summary>Spoiler</summary>
```python
lrs=[1.e-06, 1.e-05, 1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01]
lrs_losses= [7.502097129821777, 7.22658634185791, 5.24326229095459, 1.7600191831588745, 1.4037541151046753, 2.136382579803467, 2.1029751300811768, 446.49951171875]
lrs_metric_avg=[0.0017601490020751954, -0.005245075225830078, -0.041641921997070314, -0.07478624820709229, -0.007052739858627319, 0.04763659238815308, 0.03924872875213623, 9.939403522014619]
lrs_metric_var=[0.0006510000222988311, 0.0004144988674492198, 0.000668689274974986, 0.013876865854565344, 0.001481160611942387, 0.3384368026131311, 0.8817071610439394, 2157852536609.2454]
```
</details>
%% Cell type:code id: tags:
```
plot_eval(lrs, lrs_losses, lrs_metric_avg, lrs_metric_var)
```
%% Cell type:markdown id: tags:
## Optimize the training loop
> Adapt the dataset transformations, batch_size & dataloader, lr & lr_scheduler, and optimizer in order to achieve better classification results in less time.
> Change this training loop to include:
> - a learning rate scheduler : https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
> - a strategy such as early stopping or patience : https://www.kaggle.com/code/akhileshrai/tutorial-early-stopping-vanilla-rnn-pytorch?scriptVersionId=26440051&cellId=10#4.-Early-Stopping
> **Also think about changing the call to the function if you added arguments.**
> For you, we added automatic mixed precision which will be seen in the next course
> **BEFORE RUNNING, WE NEED TO REINITIALIZE THE MODEL, OPTIMIZER AND SCHEDULER FOR A FAIR FIGHT. Rewrite below the changes you have brought to them.**
%% Cell type:code id: tags:
```
model_optim = models.resnet18().to(args_optim['device'])
model_optim.name = 'Resnet-18'
optimizer_optim = torch.optim.SGD(model_optim.parameters(), args_optim['learning_rate'], args_optim['momentum'], args_optim['weight_decay'])
scheduler_optim = None
```
%% Cell type:code id: tags:
```
def train_optim(train_loader, val_loader, model, optimizer, criterion, args):
'''
The default simple training loop
'''
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
time_start = time.time()
for epoch in range(args['epochs']):
print("Epoch ", epoch)
for i, (images, labels) in enumerate(train_loader):
# distribution of images and labels to all GPUs
images = images.to(args['device'], non_blocking=True)
labels = labels.to(args['device'], non_blocking=True)
# Zero the parameter gradients
optimizer.zero_grad()
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward pass
loss.backward()
# Optimize
optimizer.step()
# Evaluate at the end of the epoch on the train set
train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
train_losses.append(train_loss)
train_accuracies.append(train_accuracy)
# Evaluate at the end of the epoch on the val set
val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
duration = time.time() - time_start
print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
results = {'model':model,
'train_losses': train_losses,
'train_accuracies': train_accuracies,
'val_losses': val_losses,
'val_accuracies': val_accuracies,
'duration':duration}
return results
```
%% Cell type:markdown id: tags:
<details>
<summary>Spoiler</summary>
```python
def train_optim(train_loader, val_loader, model, optimizer, criterion, scheduler, args):
'''
The default simple training loop
'''
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
time_start = time.time()
scaler = GradScaler()
for epoch in range(args['epochs']):
print("Epoch ", epoch)
for i, (images, labels) in enumerate(train_loader):
# distribution of images and labels to all GPUs
images = images.to(args['device'], non_blocking=True)
labels = labels.to(args['device'], non_blocking=True)
# Zero the parameter gradients
optimizer.zero_grad()
# Forward pass
with autocast():
outputs = model(images)
loss = criterion(outputs, labels)
# Backward pass
scaler.scale(loss).backward()
# Optimize
scaler.step(optimizer)
# Updates the scale for next iteration.
scaler.update()
# Update Learning Rate scheduler, warning some schedulers are updated every epoch and not step.
if scheduler is not None:
scheduler.step()
# Evaluate at the end of the epoch
train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
train_losses.append(train_loss)
train_accuracies.append(train_accuracy)
# Evaluate at the end of the epoch
val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
duration = time.time() - time_start
print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
results = {'model':model,
'train_losses': train_losses,
'train_accuracies': train_accuracies,
'val_losses': val_losses,
'val_accuracies': val_accuracies,
'duration':duration}
return results
```
</details>
%% Cell type:code id: tags:
```
results_optim = train_optim(train_loader_optim, val_loader_optim, model_optim, optimizer_optim, criterion_optim, args_optim)
```
%% Cell type:markdown id: tags:
## Classification performances comparison
> Take a look at
>- the loss and accuracy evolution
>- the difference in timings between the two runs
%% Cell type:code id: tags:
```
print("Duration for default setup training:", results_default["duration"])
print("Duration for optim setup training:", results_optim["duration"])
```
%% Cell type:code id: tags:
```
compare_trainings(results_default, results_optim)
```
%% Cell type:markdown id: tags:
## Tensorboard
Below we added a profiler and a logger for tensorboard. If you want to do it yourself in future codes, you can take example on the following documentations::
- Pytorch : https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html
- IDRIS : http://www.idris.fr/jean-zay/pre-post/jean-zay-tensorboard.html
> Try to add another metric to the logger, for example the validation loss at each epoch.
%% Cell type:code id: tags:
```
def train_default_tensorboard(train_loader, val_loader, model, optimizer, criterion, args, exp_name):
log_dir = "./logs/"+exp_name
writer = SummaryWriter(log_dir)
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []
time_start = time.time()
with torch.profiler.profile(
schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
on_trace_ready=torch.profiler.tensorboard_trace_handler(log_dir),
record_shapes=True,
profile_memory=True,
with_stack=True
) as prof:
for epoch in range(args['epochs']):
print("Epoch ", epoch)
for i, (images, labels) in enumerate(train_loader):
# distribution of images and labels to all GPUs
images = images.to(args['device'], non_blocking=True)
labels = labels.to(args['device'], non_blocking=True)
# Zero the parameter gradients
optimizer.zero_grad()
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Log a scalar (loss)
writer.add_scalar("Loss/train", loss, i+epoch*len(train_loader))
# Backward pass
loss.backward()
# Optimize
optimizer.step()
# Indicate to profiler when a step is over
prof.step()
# Evaluate at the end of the epoch on the train set
train_loss, train_accuracy = evaluate(train_loader, model, criterion, args)
print("\t Train loss : ", train_loss, "& Train accuracy : ", train_accuracy)
train_losses.append(train_loss)
train_accuracies.append(train_accuracy)
# Evaluate at the end of the epoch on the val set
val_loss, val_accuracy = evaluate(val_loader, model, criterion, args)
print("\t Validation loss : ", val_loss, "& Validation accuracy : ", val_accuracy)
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
duration = time.time() - time_start
print('Finished Training in:', duration, 'seconds with mean epoch duration:', duration/args['epochs'], ' seconds')
results = {'model':model,
'train_losses': train_losses,
'train_accuracies': train_accuracies,
'val_losses': val_losses,
'val_accuracies': val_accuracies,
'duration':duration}
return results
```
%% Cell type:code id: tags:
```
args["epochs"] = 1
_ = train_default_tensorboard(train_loader, val_loader, model, optimizer, criterion, args, "default_perf")
```
%% Cell type:code id: tags:
```
# Load the TensorBoard notebook extension
!pip install torch_tb_profiler
%load_ext tensorboard
%tensorboard --logdir logs
```
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [PER57] - Perceptron Model 1957
<!-- DESC --> Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Implement a historical linear classifier with a historical dataset !
- The objective is to predict the type of Iris from the size of the leaves.
- Identifying its limitations
The [IRIS dataset](https://archive.ics.uci.edu/ml/datasets/Iris) is probably one of the oldest datasets, dating back to 1936 .
## What we're going to do :
- Retrieve the dataset, via scikit learn
- training and classifying
## Step 1 - Import and init
%% Cell type:code id: tags:
``` python
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import os,sys
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('PER57')
```
%% Cell type:markdown id: tags:
## Step 2 - Prepare IRIS Dataset
Retrieve a dataset : http://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets
About the datesets : https://scikit-learn.org/stable/datasets.html#datasets
Data fields (X) :
- 0 : sepal length in cm
- 1 : sepal width in cm
- 2 : petal length in cm
- 3 : petal width in cm
Class (y) :
- 0 : class 0=Iris-Setosa, 1=Iris-Versicolour, 2=Iris-Virginica
### 2.1 - Get dataset
%% Cell type:code id: tags:
``` python
x0,y0 = load_iris(return_X_y=True)
x = x0[:, (2,3)] # We only keep fields 2 and 3
y = y0.copy()
y[ y0==0 ] = 1 # 1 = Iris setosa
y[ y0>=1 ] = 0 # 0 = not iris setosa
df=pd.DataFrame.from_dict({'Length (x1)':x[:,0], 'Width (x2)':x[:,1], 'Setosa {0,1} (y)':y})
display(df)
print(f'x shape : {x.shape}')
print(f'y shape : {y.shape}')
```
%% Cell type:markdown id: tags:
### 2.2 - Train and test sets
%% Cell type:code id: tags:
``` python
x,y = fidle.utils.shuffle_np_dataset(x, y)
n=int(len(x)*0.8)
x_train = x[:n]
y_train = y[:n]
x_test = x[n:]
y_test = y[n:]
print(f'x_train shape : {x_train.shape}')
print(f'y_train shape : {y_train.shape}')
print(f'x_test shape : {x_test.shape}')
print(f'y_test shape : {y_test.shape}')
```
%% Cell type:markdown id: tags:
## Step 3 - Get a perceptron, and train it
%% Cell type:code id: tags:
``` python
pct = Perceptron(max_iter=100, random_state=82, tol=0.01, verbose=1)
pct.fit(x_train, y_train)
```
%% Cell type:markdown id: tags:
## Step 4 - Prédictions
%% Cell type:code id: tags:
``` python
y_pred = pct.predict(x_test)
df=pd.DataFrame.from_dict({'Length (x1)':x_test[:,0], 'Width (x2)':x_test[:,1], 'y_test':y_test, 'y_pred':y_pred})
display(df[:15])
```
%% Cell type:markdown id: tags:
## Step 5 - Visualisation
%% Cell type:code id: tags:
``` python
def plot_perceptron(x_train,y_train,x_test,y_test):
a = -pct.coef_[0][0] / pct.coef_[0][1]
b = -pct.intercept_ / pct.coef_[0][1]
box=[x.min(axis=0)[0],x.max(axis=0)[0],x.min(axis=0)[1],x.max(axis=0)[1]]
mx=(box[1]-box[0])/20
my=(box[3]-box[2])/20
box=[box[0]-mx,box[1]+mx,box[2]-my,box[3]+my]
fig, axs = plt.subplots(1, 1)
fig.set_size_inches(10,6)
axs.plot(x_train[y_train==1, 0], x_train[y_train==1, 1], "o", color='tomato', label="Iris-Setosa")
axs.plot(x_train[y_train==0, 0], x_train[y_train==0, 1], "o", color='steelblue',label="Autres")
axs.plot(x_test[y_pred==1, 0], x_test[y_pred==1, 1], "o", color='lightsalmon', label="Iris-Setosa (pred)")
axs.plot(x_test[y_pred==0, 0], x_test[y_pred==0, 1], "o", color='lightblue', label="Autres (pred)")
axs.plot([box[0], box[1]], [a*box[0]+b, a*box[1]+b], "k--", linewidth=2)
axs.set_xlabel("Petal length (cm)", labelpad=15) #, fontsize=14)
axs.set_ylabel("Petal width (cm)", labelpad=15) #, fontsize=14)
axs.legend(loc="lower right", fontsize=14)
axs.set_xlim(box[0],box[1])
axs.set_ylim(box[2],box[3])
fidle.scrawler.save_fig('01-perceptron-iris')
plt.show()
plot_perceptron(x_train,y_train, x_test,y_test)
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:code id:d8a1f0f7 tags:
``` python
from IPython.display import display,Markdown
display(Markdown(open('README.md', 'r').read()))
#
# This README is visible under Jupiter Lab ;-)# Automatically generated on : 06/01/25 16:42:31
```
%% Output
<a name="top"></a>
[<img width="600px" src="fidle/img/title.svg"></img>](#top)
<!-- --------------------------------------------------- -->
<!-- To correctly view this README under Jupyter Lab -->
<!-- Open the notebook: README.ipynb! -->
<!-- --------------------------------------------------- -->
## About Fidle
This repository contains all the documents and links of the **Fidle Training** .
Fidle (for Formation Introduction au Deep Learning) is a 3-day training session co-organized
by the 3IA MIAI institute, the CNRS, via the Mission for Transversal and Interdisciplinary
Initiatives (MITI) and the University of Grenoble Alpes (UGA).
The objectives of this training are :
- Understanding the **bases of Deep Learning** neural networks
- Develop a **first experience** through simple and representative examples
- Understanding **Tensorflow/Keras** and **Jupyter lab** technologies
- Apprehend the **academic computing environments** Tier-2 or Tier-1 with powerfull GPU
For more information, see **https://fidle.cnrs.fr** :
- **[Fidle site](https://fidle.cnrs.fr)**
- **[Presentation of the training](https://fidle.cnrs.fr/presentation)**
- **[Detailed program](https://fidle.cnrs.fr/programme)**
- **[Subscribe to the list](https://fidle.cnrs.fr/listeinfo), to stay informed !**
- **[Corrected notebooks](https://fidle.cnrs.fr/done)**
- **[Follow us on our channel :](https://fidle.cnrs.fr/youtube)**\
[<img width="120px" style="vertical-align:middle" src="fidle/img/logo-YouTube.png"></img>](https://fidle.cnrs.fr/youtube)
For more information, you can contact us at :
[<img width="200px" style="vertical-align:middle" src="fidle/img/00-Mail_contact.svg"></img>](#top)
Current Version : <!-- VERSION_BEGIN -->3.0.15<!-- VERSION_END -->
## Course materials
| Courses | Notebooks | Datasets | Videos |
|:--:|:--:|:--:|:--:|
| [<img width="50px" src="fidle/img/00-Fidle-pdf.svg"></img><br>**Course slides**](https://fidle.cnrs.fr/supports)<br>The course in pdf format<br>| [<img width="50px" src="fidle/img/00-Notebooks.svg"></img><br>**Notebooks**](https://fidle.cnrs.fr/notebooks)<br> &nbsp;&nbsp;&nbsp;&nbsp;Get a Zip or clone this repository &nbsp;&nbsp;&nbsp;&nbsp;<br>| [<img width="50px" src="fidle/img/00-Datasets-tar.svg"></img><br>**Datasets**](https://fidle.cnrs.fr/datasets-fidle.tar)<br>All the needed datasets<br>|[<img width="50px" src="fidle/img/00-Videos.svg"></img><br>**Videos**](https://fidle.cnrs.fr/youtube)<br>&nbsp;&nbsp;&nbsp;&nbsp;Our Youtube channel&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|
Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
## Jupyter notebooks
<!-- TOC_BEGIN -->
<!-- Automatically generated on : 06/01/25 16:42:30 -->
### Linear and logistic regression
- **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb)
Low-level implementation, using numpy, of a direct resolution for a linear regression
- **[GRAD1](LinearReg/02-Gradient-descent.ipynb)** - [Linear regression with gradient descent](LinearReg/02-Gradient-descent.ipynb)
Low level implementation of a solution by gradient descent. Basic and stochastic approach.
- **[POLR1](LinearReg/03-Polynomial-Regression.ipynb)** - [Complexity Syndrome](LinearReg/03-Polynomial-Regression.ipynb)
Illustration of the problem of complexity with the polynomial regression
- **[LOGR1](LinearReg/04-Logistic-Regression.ipynb)** - [Logistic regression](LinearReg/04-Logistic-Regression.ipynb)
Simple example of logistic regression with a sklearn solution
### Perceptron Model 1957
- **[PER57](Perceptron/01-Simple-Perceptron.ipynb)** - [Perceptron Model 1957](Perceptron/01-Simple-Perceptron.ipynb)
Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
### BHPD regression (DNN), using Keras3/PyTorch
- **[K3BHPD1](BHPD.Keras3/01-DNN-Regression.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.Keras3/01-DNN-Regression.ipynb)
Simple example of a regression with the dataset Boston Housing Prices Dataset (BHPD)
- **[K3BHPD2](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)** - [Regression with a Dense Network (DNN) - Advanced code](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)
A more advanced implementation of the precedent example, using Keras3
### BHPD regression (DNN), using PyTorch
- **[PBHPD1](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)
A Simple regression with a Dense Neural Network (DNN) using Pytorch - BHPD dataset
### Wine Quality prediction (DNN), using Keras3/PyTorch
- **[K3WINE1](Wine.Keras3/01-DNN-Wine-Regression.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Keras3/01-DNN-Wine-Regression.ipynb)
Another example of regression, with a wine quality prediction, using Keras 3 and PyTorch
### Wine Quality prediction (DNN), using PyTorch/Lightning
- **[LWINE1](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)
Another example of regression, with a wine quality prediction, using PyTorch Lightning
### MNIST classification (DNN,CNN), using Keras3/PyTorch
- **[K3MNIST1](MNIST.Keras3/01-DNN-MNIST.ipynb)** - [Simple classification with DNN](MNIST.Keras3/01-DNN-MNIST.ipynb)
An example of classification using a dense neural network for the famous MNIST dataset
- **[K3MNIST2](MNIST.Keras3/02-CNN-MNIST.ipynb)** - [Simple classification with CNN](MNIST.Keras3/02-CNN-MNIST.ipynb)
An example of classification using a convolutional neural network for the famous MNIST dataset
### MNIST classification (DNN,CNN), using PyTorch
- **[PMNIST1](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)** - [Simple classification with DNN](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)
Example of classification with a fully connected neural network, using Pytorch
### MNIST classification (DNN,CNN), using PyTorch/Lightning
- **[LMNIST1](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)** - [Simple classification with DNN](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)
An example of classification using a dense neural network for the famous MNIST dataset, using PyTorch Lightning
- **[LMNIST2](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)** - [Simple classification with CNN](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)
An example of classification using a convolutional neural network for the famous MNIST dataset, using PyTorch Lightning
### Images classification GTSRB with Convolutional Neural Networks (CNN), using Keras3/PyTorch
- **[K3GTSRB1](GTSRB.Keras3/01-Preparation-of-data.ipynb)** - [Dataset analysis and preparation](GTSRB.Keras3/01-Preparation-of-data.ipynb)
Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
- **[K3GTSRB2](GTSRB.Keras3/02-First-convolutions.ipynb)** - [First convolutions](GTSRB.Keras3/02-First-convolutions.ipynb)
Episode 2 : First convolutions and first classification of our traffic signs, using Keras3
- **[K3GTSRB3](GTSRB.Keras3/03-Better-convolutions.ipynb)** - [Training monitoring](GTSRB.Keras3/03-Better-convolutions.ipynb)
Episode 3 : Monitoring, analysis and check points during a training session, using Keras3
- **[K3GTSRB10](GTSRB.Keras3/batch_oar.sh)** - [OAR batch script submission](GTSRB.Keras3/batch_oar.sh)
Bash script for an OAR batch submission of an ipython code
- **[K3GTSRB11](GTSRB.Keras3/batch_slurm.sh)** - [SLURM batch script](GTSRB.Keras3/batch_slurm.sh)
Bash script for a Slurm batch submission of an ipython code
### Sentiment analysis with word embedding, using Keras3/PyTorch
- **[K3IMDB1](Embedding.Keras3/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras3/01-One-hot-encoding.ipynb)
A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
- **[K3IMDB2](Embedding.Keras3/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras3/02-Keras-embedding.ipynb)
A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
- **[K3IMDB3](Embedding.Keras3/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras3/03-Prediction.ipynb)
Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch
- **[K3IMDB4](Embedding.Keras3/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras3/04-Show-vectors.ipynb)
Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch
- **[K3IMDB5](Embedding.Keras3/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras3/05-LSTM-Keras.ipynb)
Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch
### Time series with Recurrent Neural Network (RNN), using Keras3/PyTorch
- **[K3LADYB1](RNN.Keras3/01-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras3/01-Ladybug.ipynb)
Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch
### Graph Neural Networks
### Unsupervised learning with an autoencoder neural network (AE), using Keras3
- **[K3AE1](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)** - [Prepare a noisy MNIST dataset](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)
Episode 1: Preparation of a noisy MNIST dataset
- **[K3AE2](AE.Keras3/02-AE-with-MNIST.ipynb)** - [Building and training an AE denoiser model](AE.Keras3/02-AE-with-MNIST.ipynb)
Episode 1 : Construction of a denoising autoencoder and training of it with a noisy MNIST dataset.
- **[K3AE3](AE.Keras3/03-AE-with-MNIST-post.ipynb)** - [Playing with our denoiser model](AE.Keras3/03-AE-with-MNIST-post.ipynb)
Episode 2 : Using the previously trained autoencoder to denoise data
- **[K3AE4](AE.Keras3/04-ExtAE-with-MNIST.ipynb)** - [Denoiser and classifier model](AE.Keras3/04-ExtAE-with-MNIST.ipynb)
Episode 4 : Construction of a denoiser and classifier model
- **[K3AE5](AE.Keras3/05-ExtAE-with-MNIST.ipynb)** - [Advanced denoiser and classifier model](AE.Keras3/05-ExtAE-with-MNIST.ipynb)
Episode 5 : Construction of an advanced denoiser and classifier model
### Generative network with Variational Autoencoder (VAE), using Keras3
- **[K3VAE1](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)** - [First VAE, using functional API (MNIST dataset)](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)
Construction and training of a VAE, using functional APPI, with a latent space of small dimension.
- **[K3VAE2](VAE.Keras3/02-VAE-with-MNIST.ipynb)** - [VAE, using a custom model class (MNIST dataset)](VAE.Keras3/02-VAE-with-MNIST.ipynb)
Construction and training of a VAE, using model subclass, with a latent space of small dimension.
- **[K3VAE3](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)** - [Analysis of the VAE's latent space of MNIST dataset](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)
Visualization and analysis of the VAE's latent space of the dataset MNIST
### Generative Adversarial Networks (GANs), using Lightning
- **[PLSHEEP3](DCGAN.Lightning/01-DCGAN-PL.ipynb)** - [A DCGAN to Draw a Sheep, using Pytorch Lightning](DCGAN.Lightning/01-DCGAN-PL.ipynb)
"Draw me a sheep", revisited with a DCGAN, using Pytorch Lightning
### Diffusion Model (DDPM) using PyTorch
- **[DDPM1](DDPM.PyTorch/01-ddpm.ipynb)** - [Fashion MNIST Generation with DDPM](DDPM.PyTorch/01-ddpm.ipynb)
Diffusion Model example, to generate Fashion MNIST images.
- **[DDPM2](DDPM.PyTorch/model.py)** - [DDPM Python classes](DDPM.PyTorch/model.py)
Python classes used by DDMP Example
### Training optimization, using PyTorch
- **[OPT1](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)** - [Training setup optimization](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)
The goal of this notebook is to go through a typical deep learning model training
### Deep Reinforcement Learning (DRL), using PyTorch
- **[DRL1](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)** - [Solving CartPole with DQN](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)
Using a a Deep Q-Network to play CartPole - an inverted pendulum problem (PyTorch)
- **[DRL2](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)** - [RL Baselines3 Zoo: Training in Colab](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)
Demo of Stable baseline3 with Colab
### Miscellaneous things, but very important!
- **[NP1](Misc/00-Numpy.ipynb)** - [A short introduction to Numpy](Misc/00-Numpy.ipynb)
Numpy is an essential tool for the Scientific Python.
- **[ACTF1](Misc/01-Activation-Functions.ipynb)** - [Activation functions](Misc/01-Activation-Functions.ipynb)
Some activation functions, with their derivatives.
- **[PANDAS1](Misc/02-Using-pandas.ipynb)** - [Quelques exemples avec Pandas](Misc/02-Using-pandas.ipynb)
pandas is another essential tool for the Scientific Python.
- **[PYTORCH1](Misc/03-Using-Pytorch.ipynb)** - [Practical Lab : PyTorch](Misc/03-Using-Pytorch.ipynb)
PyTorch est l'un des principaux framework utilisé dans le Deep Learning
- **[TSB1](Misc/04-Using-Tensorboard.ipynb)** - [Tensorboard with/from Jupyter ](Misc/04-Using-Tensorboard.ipynb)
4 ways to use Tensorboard from the Jupyter environment
- **[K3LSTM1](Misc/05-RNN.ipynb)** - [Basic Keras LSTM Layer](Misc/05-RNN.ipynb)
A small example of an LSTM layer in Keras
- **[PGRAD1](Misc/06-Gradients.ipynb)** - [Gradient illustration with PyTorch](Misc/06-Gradients.ipynb)
Exemple de calcul d'un gradient avec PyTorch
- **[FID1](Misc/99-Fid-Example.ipynb)** - [Exemple de notebook Fidle ](Misc/99-Fid-Example.ipynb)
Un simple exemple de notebook Fidle
<!-- TOC_END -->
## Installation
Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
## Licence
[<img width="100px" src="fidle/img/00-fidle-CC BY-NC-SA.svg"></img>](https://creativecommons.org/licenses/by-nc-sa/4.0/)
\[en\] Attribution - NonCommercial - ShareAlike 4.0 International (CC BY-NC-SA 4.0)
\[Fr\] Attribution - Pas d’Utilisation Commerciale - Partage dans les Mêmes Conditions 4.0 International
See [License](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
See [Disclaimer](https://creativecommons.org/licenses/by-nc-sa/4.0/#).
----
[<img width="80px" src="fidle/img/logo-paysage.svg"></img>](#top)
<a name="top"></a>
[<img width="600px" src="fidle/img/title.svg"></img>](#top)
German Traffic Sign Recognition Benchmark (GTSRB) <!-- --------------------------------------------------- -->
================================================= <!-- To correctly view this README under Jupyter Lab -->
--- <!-- Open the notebook: README.ipynb! -->
Introduction au Deep Learning (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020 <!-- --------------------------------------------------- -->
## 1/ Environment ## About Fidle
To install your conda environment :
```
conda env create -f environment.yml
```
## 4/ Misc This repository contains all the documents and links of the **Fidle Training** .
To update an existing environment : Fidle (for Formation Introduction au Deep Learning) is a 3-day training session co-organized
``` by the 3IA MIAI institute, the CNRS, via the Mission for Transversal and Interdisciplinary
conda env update --name=deeplearning2 --file=environment.yml Initiatives (MITI) and the University of Grenoble Alpes (UGA).
```
\ No newline at end of file The objectives of this training are :
- Understanding the **bases of Deep Learning** neural networks
- Develop a **first experience** through simple and representative examples
- Understanding **Tensorflow/Keras** and **Jupyter lab** technologies
- Apprehend the **academic computing environments** Tier-2 or Tier-1 with powerfull GPU
For more information, see **https://fidle.cnrs.fr** :
- **[Fidle site](https://fidle.cnrs.fr)**
- **[Presentation of the training](https://fidle.cnrs.fr/presentation)**
- **[Detailed program](https://fidle.cnrs.fr/programme)**
- **[Subscribe to the list](https://fidle.cnrs.fr/listeinfo), to stay informed !**
- **[Corrected notebooks](https://fidle.cnrs.fr/done)**
- **[Follow us on our channel :](https://fidle.cnrs.fr/youtube)**\
[<img width="120px" style="vertical-align:middle" src="fidle/img/logo-YouTube.png"></img>](https://fidle.cnrs.fr/youtube)
For more information, you can contact us at :
[<img width="200px" style="vertical-align:middle" src="fidle/img/00-Mail_contact.svg"></img>](#top)
Current Version : <!-- VERSION_BEGIN -->3.0.15<!-- VERSION_END -->
## Course materials
| Courses | Notebooks | Datasets | Videos |
|:--:|:--:|:--:|:--:|
| [<img width="50px" src="fidle/img/00-Fidle-pdf.svg"></img><br>**Course slides**](https://fidle.cnrs.fr/supports)<br>The course in pdf format<br>| [<img width="50px" src="fidle/img/00-Notebooks.svg"></img><br>**Notebooks**](https://fidle.cnrs.fr/notebooks)<br> &nbsp;&nbsp;&nbsp;&nbsp;Get a Zip or clone this repository &nbsp;&nbsp;&nbsp;&nbsp;<br>| [<img width="50px" src="fidle/img/00-Datasets-tar.svg"></img><br>**Datasets**](https://fidle.cnrs.fr/datasets-fidle.tar)<br>All the needed datasets<br>|[<img width="50px" src="fidle/img/00-Videos.svg"></img><br>**Videos**](https://fidle.cnrs.fr/youtube)<br>&nbsp;&nbsp;&nbsp;&nbsp;Our Youtube channel&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|
Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
## Jupyter notebooks
<!-- TOC_BEGIN -->
<!-- Automatically generated on : 06/01/25 16:42:30 -->
### Linear and logistic regression
- **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb)
Low-level implementation, using numpy, of a direct resolution for a linear regression
- **[GRAD1](LinearReg/02-Gradient-descent.ipynb)** - [Linear regression with gradient descent](LinearReg/02-Gradient-descent.ipynb)
Low level implementation of a solution by gradient descent. Basic and stochastic approach.
- **[POLR1](LinearReg/03-Polynomial-Regression.ipynb)** - [Complexity Syndrome](LinearReg/03-Polynomial-Regression.ipynb)
Illustration of the problem of complexity with the polynomial regression
- **[LOGR1](LinearReg/04-Logistic-Regression.ipynb)** - [Logistic regression](LinearReg/04-Logistic-Regression.ipynb)
Simple example of logistic regression with a sklearn solution
### Perceptron Model 1957
- **[PER57](Perceptron/01-Simple-Perceptron.ipynb)** - [Perceptron Model 1957](Perceptron/01-Simple-Perceptron.ipynb)
Example of use of a Perceptron, with sklearn and IRIS dataset of 1936 !
### BHPD regression (DNN), using Keras3/PyTorch
- **[K3BHPD1](BHPD.Keras3/01-DNN-Regression.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.Keras3/01-DNN-Regression.ipynb)
Simple example of a regression with the dataset Boston Housing Prices Dataset (BHPD)
- **[K3BHPD2](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)** - [Regression with a Dense Network (DNN) - Advanced code](BHPD.Keras3/02-DNN-Regression-Premium.ipynb)
A more advanced implementation of the precedent example, using Keras3
### BHPD regression (DNN), using PyTorch
- **[PBHPD1](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)** - [Regression with a Dense Network (DNN)](BHPD.PyTorch/01-DNN-Regression_PyTorch.ipynb)
A Simple regression with a Dense Neural Network (DNN) using Pytorch - BHPD dataset
### Wine Quality prediction (DNN), using Keras3/PyTorch
- **[K3WINE1](Wine.Keras3/01-DNN-Wine-Regression.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Keras3/01-DNN-Wine-Regression.ipynb)
Another example of regression, with a wine quality prediction, using Keras 3 and PyTorch
### Wine Quality prediction (DNN), using PyTorch/Lightning
- **[LWINE1](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)** - [Wine quality prediction with a Dense Network (DNN)](Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb)
Another example of regression, with a wine quality prediction, using PyTorch Lightning
### MNIST classification (DNN,CNN), using Keras3/PyTorch
- **[K3MNIST1](MNIST.Keras3/01-DNN-MNIST.ipynb)** - [Simple classification with DNN](MNIST.Keras3/01-DNN-MNIST.ipynb)
An example of classification using a dense neural network for the famous MNIST dataset
- **[K3MNIST2](MNIST.Keras3/02-CNN-MNIST.ipynb)** - [Simple classification with CNN](MNIST.Keras3/02-CNN-MNIST.ipynb)
An example of classification using a convolutional neural network for the famous MNIST dataset
### MNIST classification (DNN,CNN), using PyTorch
- **[PMNIST1](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)** - [Simple classification with DNN](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)
Example of classification with a fully connected neural network, using Pytorch
### MNIST classification (DNN,CNN), using PyTorch/Lightning
- **[LMNIST1](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)** - [Simple classification with DNN](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)
An example of classification using a dense neural network for the famous MNIST dataset, using PyTorch Lightning
- **[LMNIST2](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)** - [Simple classification with CNN](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)
An example of classification using a convolutional neural network for the famous MNIST dataset, using PyTorch Lightning
### Images classification GTSRB with Convolutional Neural Networks (CNN), using Keras3/PyTorch
- **[K3GTSRB1](GTSRB.Keras3/01-Preparation-of-data.ipynb)** - [Dataset analysis and preparation](GTSRB.Keras3/01-Preparation-of-data.ipynb)
Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
- **[K3GTSRB2](GTSRB.Keras3/02-First-convolutions.ipynb)** - [First convolutions](GTSRB.Keras3/02-First-convolutions.ipynb)
Episode 2 : First convolutions and first classification of our traffic signs, using Keras3
- **[K3GTSRB3](GTSRB.Keras3/03-Better-convolutions.ipynb)** - [Training monitoring](GTSRB.Keras3/03-Better-convolutions.ipynb)
Episode 3 : Monitoring, analysis and check points during a training session, using Keras3
- **[K3GTSRB10](GTSRB.Keras3/batch_oar.sh)** - [OAR batch script submission](GTSRB.Keras3/batch_oar.sh)
Bash script for an OAR batch submission of an ipython code
- **[K3GTSRB11](GTSRB.Keras3/batch_slurm.sh)** - [SLURM batch script](GTSRB.Keras3/batch_slurm.sh)
Bash script for a Slurm batch submission of an ipython code
### Sentiment analysis with word embedding, using Keras3/PyTorch
- **[K3IMDB1](Embedding.Keras3/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras3/01-One-hot-encoding.ipynb)
A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
- **[K3IMDB2](Embedding.Keras3/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras3/02-Keras-embedding.ipynb)
A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
- **[K3IMDB3](Embedding.Keras3/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras3/03-Prediction.ipynb)
Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch
- **[K3IMDB4](Embedding.Keras3/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras3/04-Show-vectors.ipynb)
Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch
- **[K3IMDB5](Embedding.Keras3/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras3/05-LSTM-Keras.ipynb)
Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch
### Time series with Recurrent Neural Network (RNN), using Keras3/PyTorch
- **[K3LADYB1](RNN.Keras3/01-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras3/01-Ladybug.ipynb)
Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch
### Graph Neural Networks
### Unsupervised learning with an autoencoder neural network (AE), using Keras3
- **[K3AE1](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)** - [Prepare a noisy MNIST dataset](AE.Keras3/01-Prepare-MNIST-dataset.ipynb)
Episode 1: Preparation of a noisy MNIST dataset
- **[K3AE2](AE.Keras3/02-AE-with-MNIST.ipynb)** - [Building and training an AE denoiser model](AE.Keras3/02-AE-with-MNIST.ipynb)
Episode 1 : Construction of a denoising autoencoder and training of it with a noisy MNIST dataset.
- **[K3AE3](AE.Keras3/03-AE-with-MNIST-post.ipynb)** - [Playing with our denoiser model](AE.Keras3/03-AE-with-MNIST-post.ipynb)
Episode 2 : Using the previously trained autoencoder to denoise data
- **[K3AE4](AE.Keras3/04-ExtAE-with-MNIST.ipynb)** - [Denoiser and classifier model](AE.Keras3/04-ExtAE-with-MNIST.ipynb)
Episode 4 : Construction of a denoiser and classifier model
- **[K3AE5](AE.Keras3/05-ExtAE-with-MNIST.ipynb)** - [Advanced denoiser and classifier model](AE.Keras3/05-ExtAE-with-MNIST.ipynb)
Episode 5 : Construction of an advanced denoiser and classifier model
### Generative network with Variational Autoencoder (VAE), using Keras3
- **[K3VAE1](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)** - [First VAE, using functional API (MNIST dataset)](VAE.Keras3/01-VAE-with-MNIST-LossLayer.ipynb)
Construction and training of a VAE, using functional APPI, with a latent space of small dimension.
- **[K3VAE2](VAE.Keras3/02-VAE-with-MNIST.ipynb)** - [VAE, using a custom model class (MNIST dataset)](VAE.Keras3/02-VAE-with-MNIST.ipynb)
Construction and training of a VAE, using model subclass, with a latent space of small dimension.
- **[K3VAE3](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)** - [Analysis of the VAE's latent space of MNIST dataset](VAE.Keras3/03-VAE-with-MNIST-post.ipynb)
Visualization and analysis of the VAE's latent space of the dataset MNIST
### Generative Adversarial Networks (GANs), using Lightning
- **[PLSHEEP3](DCGAN.Lightning/01-DCGAN-PL.ipynb)** - [A DCGAN to Draw a Sheep, using Pytorch Lightning](DCGAN.Lightning/01-DCGAN-PL.ipynb)
"Draw me a sheep", revisited with a DCGAN, using Pytorch Lightning
### Diffusion Model (DDPM) using PyTorch
- **[DDPM1](DDPM.PyTorch/01-ddpm.ipynb)** - [Fashion MNIST Generation with DDPM](DDPM.PyTorch/01-ddpm.ipynb)
Diffusion Model example, to generate Fashion MNIST images.
- **[DDPM2](DDPM.PyTorch/model.py)** - [DDPM Python classes](DDPM.PyTorch/model.py)
Python classes used by DDMP Example
### Training optimization, using PyTorch
- **[OPT1](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)** - [Training setup optimization](Optimization.PyTorch/01-Apprentissages-rapides-et-Optimisations.ipynb)
The goal of this notebook is to go through a typical deep learning model training
### Deep Reinforcement Learning (DRL), using PyTorch
- **[DRL1](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)** - [Solving CartPole with DQN](DRL.PyTorch/FIDLE_DQNfromScratch.ipynb)
Using a a Deep Q-Network to play CartPole - an inverted pendulum problem (PyTorch)
- **[DRL2](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)** - [RL Baselines3 Zoo: Training in Colab](DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb)
Demo of Stable baseline3 with Colab
### Miscellaneous things, but very important!
- **[NP1](Misc/00-Numpy.ipynb)** - [A short introduction to Numpy](Misc/00-Numpy.ipynb)
Numpy is an essential tool for the Scientific Python.
- **[ACTF1](Misc/01-Activation-Functions.ipynb)** - [Activation functions](Misc/01-Activation-Functions.ipynb)
Some activation functions, with their derivatives.
- **[PANDAS1](Misc/02-Using-pandas.ipynb)** - [Quelques exemples avec Pandas](Misc/02-Using-pandas.ipynb)
pandas is another essential tool for the Scientific Python.
- **[PYTORCH1](Misc/03-Using-Pytorch.ipynb)** - [Practical Lab : PyTorch](Misc/03-Using-Pytorch.ipynb)
PyTorch est l'un des principaux framework utilisé dans le Deep Learning
- **[TSB1](Misc/04-Using-Tensorboard.ipynb)** - [Tensorboard with/from Jupyter ](Misc/04-Using-Tensorboard.ipynb)
4 ways to use Tensorboard from the Jupyter environment
- **[K3LSTM1](Misc/05-RNN.ipynb)** - [Basic Keras LSTM Layer](Misc/05-RNN.ipynb)
A small example of an LSTM layer in Keras
- **[PGRAD1](Misc/06-Gradients.ipynb)** - [Gradient illustration with PyTorch](Misc/06-Gradients.ipynb)
Exemple de calcul d'un gradient avec PyTorch
- **[FID1](Misc/99-Fid-Example.ipynb)** - [Exemple de notebook Fidle ](Misc/99-Fid-Example.ipynb)
Un simple exemple de notebook Fidle
<!-- TOC_END -->
## Installation
Have a look about **[How to get and install](https://fidle.cnrs.fr/installation)** these notebooks and datasets.
## Licence
[<img width="100px" src="fidle/img/00-fidle-CC BY-NC-SA.svg"></img>](https://creativecommons.org/licenses/by-nc-sa/4.0/)
\[en\] Attribution - NonCommercial - ShareAlike 4.0 International (CC BY-NC-SA 4.0)
\[Fr\] Attribution - Pas d’Utilisation Commerciale - Partage dans les Mêmes Conditions 4.0 International
See [License](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
See [Disclaimer](https://creativecommons.org/licenses/by-nc-sa/4.0/#).
----
[<img width="80px" src="fidle/img/logo-paysage.svg"></img>](#top)
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3LADYB1] - Prediction of a 2D trajectory via RNN
<!-- DESC --> Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Understanding the use of a recurrent neural network
## What we're going to do :
- Generate an artificial dataset
- dataset preparation
- Doing our testing
- Making predictions
## Step 1 - Import and init
### 1.1 - Python
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import numpy as np
from math import cos, sin
import random
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3LADYB1')
```
%% Cell type:markdown id: tags:
### 1.2 - Parameters
%% Cell type:code id: tags:
``` python
# ---- About dataset
#
max_t = 1000
delta_t = 0.01
features_len = 2
sequence_len = 20
predict_len = 5
# ---- About training
#
scale = .2 # Percentage of dataset to be used (1=all)
train_prop = .8 # Percentage for train (the rest being for the test)
batch_size = 32
epochs = 5
fit_verbosity = 1 # 0 = silent, 1 = progress bar, 2 = one line per epoch
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('scale', 'train_prop', 'sequence_len', 'predict_len', 'batch_size', 'epochs', 'fit_verbosity')
```
%% Cell type:markdown id: tags:
## Step 2 - Generation of a fun dataset
### 2.1 - Virtual trajectory of our ladybug
%% Cell type:code id: tags:
``` python
def ladybug_init(s=122):
if s>0 : random.seed(s)
ladybug_init.params_x = [ random.gauss(0.,1.) for u in range(8)]
ladybug_init.params_y = [ random.gauss(0.,1.) for u in range(8)]
def ladybug_move(t):
[ax1, ax2, ax3, ax4, kx1, kx2, kx3, kx4] = ladybug_init.params_x
[ay1, ay2, ay3, ay4, ky1, ky2, ky3, ky4] = ladybug_init.params_y
x = ax1*sin(t*(kx1+20)) + ax2*cos(t*(kx2+10)) + ax3*sin(t*(kx3+5)) + ax4*cos(t*(kx4+5))
y = ay1*cos(t*(ky1+20)) + ay2*sin(t*(ky2+10)) + ay3*cos(t*(ky3+5)) + ay4*sin(t*(ky4+5))
return x,y
```
%% Cell type:markdown id: tags:
### 2.2 - Get some positions, and build a rescaled and normalized dataset
%% Cell type:code id: tags:
``` python
# ---- Get positions
#
ladybug_init(s=16)
x,y = 0,0
positions=[]
for t in np.arange(0., max_t, delta_t):
x,y = ladybug_move(t)
positions.append([x,y])
# ---- Build rescaled dataset
#
n = int( len(positions)*scale )
dataset = np.array(positions[:n])
k = int(len(dataset)*train_prop)
x_train = dataset[:k]
x_test = dataset[k:]
# ---- Normalize
#
mean = x_train.mean()
std = x_train.std()
x_train = (x_train - mean) / std
x_test = (x_test - mean) / std
print("Dataset generated.")
print("Train shape is : ", x_train.shape)
print("Test shape is : ", x_test.shape)
```
%% Cell type:markdown id: tags:
### 2.3 - Have a look
An extract from the data we have: the virtual trajectory of our ladybug
And what we want to predict (in red), from a segment (in blue)
%% Cell type:code id: tags:
``` python
fidle.scrawler.serie_2d(x_train[:1000], figsize=(12,12), lw=1,ms=4,save_as='01-dataset')
```
%% Cell type:code id: tags:
``` python
k1,k2 = sequence_len, predict_len
i = random.randint(0,len(x_test)-k1-k2)
j = i+k1
fidle.scrawler.segment_2d( x_test[i:j+k2], x_test[j:j+k2],ms=6, save_as='02-objectives')
```
%% Cell type:markdown id: tags:
### 2.4 - Prepare sequences from datasets
%% Cell type:code id: tags:
``` python
# ---- Create sequences and labels for train and test
#
xs_train, ys_train=[],[]
all_i = np.random.permutation( len(x_train) - sequence_len - 1 )
for i in all_i:
xs_train.append( x_train[ i : i+sequence_len ] )
ys_train.append( x_train[ i+sequence_len+1 ] )
xs_test, ys_test=[],[]
for i in range( len(x_test) - sequence_len - 1):
xs_test.append( x_test[ i : i+sequence_len ] )
ys_test.append( x_test[ i+sequence_len+1 ] )
# ---- Convert to numpy / float16
xs_train = np.array(xs_train, dtype='float16')
ys_train = np.array(ys_train, dtype='float16')
xs_test = np.array(xs_test, dtype='float16')
ys_test = np.array(ys_test, dtype='float16')
```
%% Cell type:code id: tags:
``` python
fidle.utils.subtitle('About the splitting of our dataset :')
print('Number of sequences : ', len(xs_train))
print('xs_train shape : ',xs_train.shape)
print('ys_train shape : ',ys_train.shape)
fidle.utils.subtitle('What an xs look like :')
fidle.utils.np_print(xs_train[10] )
fidle.utils.subtitle('What an ys look like :')
fidle.utils.np_print(ys_train[10])
```
%% Cell type:markdown id: tags:
## Step 3 - Create a model
%% Cell type:code id: tags:
``` python
model = keras.models.Sequential()
model.add( keras.layers.InputLayer(shape=(sequence_len, features_len)) )
model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )
model.add( keras.layers.Dense(features_len) )
model.summary()
model.compile(optimizer='rmsprop',
loss='mse',
metrics = ['mae'] )
```
%% Cell type:markdown id: tags:
## Step 4 - Train the model
### 4.1 Add Callbacks
%% Cell type:code id: tags:
``` python
os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)
save_dir = f'{run_dir}/models/best_model.keras'
savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_mae', mode='max', save_best_only=True)
```
%% Cell type:markdown id: tags:
### 4.2 - Train it
Need 3' on a cpu laptop
%% Cell type:code id: tags:
``` python
chrono=fidle.Chrono()
chrono.start()
history=model.fit(xs_train,ys_train,
epochs = epochs,
verbose = fit_verbosity,
validation_data = (xs_test, ys_test),
callbacks = [savemodel_callback])
chrono.show()
```
%% Cell type:code id: tags:
``` python
fidle.scrawler.history(history,plot={'loss':['loss','val_loss'], 'mae':['mae','val_mae']}, save_as='03-history')
```
%% Cell type:markdown id: tags:
## Step 5 - Predict
%% Cell type:markdown id: tags:
### 5.1 - Load model
%% Cell type:code id: tags:
``` python
loaded_model = keras.models.load_model(f'{run_dir}/models/best_model.keras')
print('Loaded.')
```
%% Cell type:markdown id: tags:
### 5.2 - Make a 1-step prediction
A simple prediction on a single iteration
%% Cell type:code id: tags:
``` python
s=random.randint(0,len(x_test)-sequence_len)
sequence = x_test[s:s+sequence_len]
sequence_true = x_test[s:s+sequence_len+1]
sequence_pred = loaded_model.predict( np.array([sequence]), verbose=fit_verbosity )
print('sequence shape :',sequence.shape)
print('sequence true shape :',sequence_true.shape)
print('sequence pred shape :',sequence_pred.shape)
fidle.scrawler.segment_2d(sequence_true, sequence_pred, save_as='04-one-step-prediction')
fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, labels=['Axis=0', 'Axis=1'],save_as='05-one-step-prediction-2axis')
```
%% Cell type:markdown id: tags:
### 5.3 - Make n-steps prediction
A longer term prediction, via a nice iteration function
We will perform <iteration> predictions to iteratively build our prediction.
%% Cell type:code id: tags:
``` python
def get_prediction(dataset, model, iterations=4):
# ---- Initial sequence
#
s=random.randint(0,len(dataset)-sequence_len-iterations)
sequence_pred = dataset[s:s+sequence_len].copy()
sequence_true = dataset[s:s+sequence_len+iterations].copy()
# ---- Iterate
#
sequence_pred = list(sequence_pred)
for i in range(iterations):
sequence = sequence_pred[-sequence_len:]
prediction = model.predict( np.array([sequence]), verbose=fit_verbosity )
sequence_pred.append(prediction[0])
# ---- Extract the predictions
#
prediction = np.array(sequence_pred[-iterations:])
return sequence_true,prediction
```
%% Cell type:markdown id: tags:
An n-steps prediction :
%% Cell type:code id: tags:
``` python
sequence_true, sequence_pred = get_prediction(x_test, loaded_model, iterations=5)
fidle.scrawler.segment_2d(sequence_true, sequence_pred, ms=8, save_as='06-n-steps-prediction-norm')
fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, hide_ticks=True, labels=['Axis=0', 'Axis=1'],save_as='07-n-steps-prediction-norm')
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [TRANS1] - IMDB, Sentiment analysis with Transformers
<!-- DESC --> Using a Tranformer to perform a sentiment analysis (IMDB) - Jean Zay version
<!-- AUTHOR : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS) -->
By : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS)
## Objectives :
- Complement the learning of a Transformer to perform a sentiment analysis
- Understand the use of a pre-trained transformer
This task is exactly the same as the Sentiment analysis with text embedding. Only this time,
we are going to exploit the strenght of transformers. Considering how computation-heavy transformer
pretraining is, we are going to use a pretrained BERT model from HuggingFace.
This notebook performs the fine-tuning process. If possible, try to use a GPU to speed up
the training, transformers are difficult to train on CPU.
## What we are going to do:
* Retrieve the dataset
* Prepare the dataset
* Fetch a pretrained BERT model from HuggingFace's platform (https://huggingface.co/models)
* Fine-tune the model on a sequence classification task: the sentiment analysis of the IMDB dataset
* Evaluate the result
%% Cell type:markdown id: tags:
## Installations
**IMPORTANT :** We will need to use the library `transformers` created by HuggingFace.
The next line only applies on Jean Zay, it allows us to load a very specific environment, which contains Tensorflow with GPU support. Ignore that line if this notebook is not executed on Jean Zay.
%% Cell type:code id: tags:
``` python
#!pip install transformers
!module load tensorflow-gpu/py3/2.6.0
```
%% Cell type:markdown id: tags:
## Imports and initialisation
%% Cell type:code id: tags:
``` python
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.datasets.imdb as imdb
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from tensorflow.keras import mixed_precision
from transformers import (
DistilBertTokenizer,
TFDistilBertModel,
DataCollatorWithPadding,
BertTokenizer,
TFBertModel
)
import pickle
import multiprocessing
import itertools
import os
import matplotlib.pyplot as plt
import seaborn as sns
print("Tensorflow ", tf.__version__)
n_gpus = len(tf.config.list_physical_devices('GPU'))
print("#GPUs: ", n_gpus)
if n_gpus > 0:
!nvidia-smi -L
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)
os.environ["TOKENIZERS_PARALLELISM"] = "true"
np.random.seed(987654321)
tf.random.set_seed(987654321)
```
%% Cell type:markdown id: tags:
## Parameters
* `vocab_size` refers to the number of words which will be remembered in our vocabulary.
* `hide_most_frequently` is the number of ignored words, among the most common ones.
* `review_len` is the review length.
* `n_cpus` is the number of CPU which will be used for data preprocessing.
* `distil` refers to whether or not we are going to use a DistilBert model or a regular Bert model.
* `load_locally` will fetch data locally, otherwise will download on the Internet (requires an Internet connection, not possible on Jean Zay)
%% Cell type:code id: tags:
``` python
vocab_size = 30000
hide_most_frequently = 0
review_len = 512
epochs = 1
batch_size = 32
fit_verbosity = 1
scale = 1
n_cpus = 6
distil = True
load_locally = True # if set to False, will fetch data from the internet (requires an internet connection)
```
%% Cell type:markdown id: tags:
## Retrieve the dataset
%% Cell type:code id: tags:
``` python
if load_locally:
with open("dataset", "rb") as file_:
(x_train, y_train), (x_test, y_test) = pickle.load(file_)
else:
(x_train, y_train), (x_test, y_test) = imdb.load_data(
num_words=vocab_size,
skip_top=hide_most_frequently,
seed=123456789,
)
with open("dataset", "wb") as file_:
pickle.dump(((x_train, y_train), (x_test, y_test)), file_)
y_train = np.asarray(y_train).astype('float32')
y_test = np.asarray(y_test ).astype('float32')
n1 = int(scale * len(x_train))
n2 = int(scale * len(x_test))
x_train, y_train = x_train[:n1], y_train[:n1]
x_test, y_test = x_test[:n2], y_test[:n2]
print("x_train : {} y_train : {}".format(x_train.shape, y_train.shape))
print("x_test : {} y_test : {}".format(x_test.shape, y_test.shape))
print('\nReview sample (x_train[12]) :\n\n',x_train[12])
```
%% Cell type:code id: tags:
``` python
if load_locally:
with open("word_index", "rb") as file_:
word_index = pickle.load(file_)
else:
word_index = imdb.get_word_index()
with open("word_index", "wb") as file_:
pickle.dump(word_index, file_)
word_index = {w:(i+3) for w,i in word_index.items()}
word_index.update({'[PAD]':0, '[CLS]':1, '[UNK]':2})
index_word = {index:word for word,index in word_index.items()}
# Add a nice function to transpose:
def dataset2text(review):
return ' '.join([index_word.get(i, "?") for i in review[1:]])
```
%% Cell type:code id: tags:
``` python
print(dataset2text(x_train[12]))
```
%% Cell type:markdown id: tags:
## Fetch the model from HuggingFace
%% Cell type:code id: tags:
``` python
def load_model(distil, load_locally):
if load_locally:
if distil:
bert_model = TFDistilBertModel.from_pretrained("distilbert_model")
tokenizer = DistilBertTokenizer("distilbert_vocab.txt", do_lower_case=True)
else:
bert_model = TFBertModel.from_pretrained("bert_model")
tokenizer = BertTokenizer("bert_vocab.txt", do_lower_case=True)
return bert_model, tokenizer
if distil:
bert_model = TFDistilBertModel.from_pretrained("distilbert-base-uncased")
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
bert_model.save_pretrained("distilbert_model")
tokenizer.save_vocabulary("distilbert_vocab.txt")
else:
bert_model = TFBertModel.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model.save_pretrained("bert_model")
tokenizer.save_vocabulary("bert_vocab.txt")
return bert_model, tokenizer
bert_model, tokenizer = load_model(distil, load_locally)
bert_model.summary()
```
%% Cell type:markdown id: tags:
## Prepare the dataset
%% Cell type:code id: tags:
``` python
def tokenize_sample(sample, tokenizer):
return tokenizer(dataset2text(sample), truncation=True, max_length=review_len)
def distributed_tokenize_dataset(dataset):
ds = list(dataset)
with multiprocessing.Pool(n_cpus) as pool:
tokenized_ds = pool.starmap(
tokenize_sample,
zip(ds, itertools.repeat(tokenizer, len(ds)))
)
return tokenized_ds
tokenized_x_train = distributed_tokenize_dataset(x_train)
tokenized_x_test = distributed_tokenize_dataset(x_test)
```
%% Cell type:code id: tags:
``` python
data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf")
```
%% Cell type:code id: tags:
``` python
data_collator(tokenized_x_train)
```
%% Cell type:code id: tags:
``` python
def make_dataset(x, y):
collated = data_collator(x)
dataset = tf.data.Dataset.from_tensor_slices(
(collated['input_ids'], collated['attention_mask'], y)
)
transformed_dataset = (
dataset
.map(
lambda x, y, z: ((x, y), z)
)
.shuffle(25000)
.batch(batch_size)
)
return transformed_dataset
train_ds = make_dataset(tokenized_x_train, y_train)
test_ds = make_dataset(tokenized_x_test, y_test)
for x, y in train_ds:
print(x)
break
```
%% Cell type:markdown id: tags:
## Add a new head to the model
%% Cell type:code id: tags:
``` python
class ClassificationModel(keras.Model):
def __init__(self, bert_model):
super(ClassificationModel, self).__init__()
self.bert_model = bert_model
self.pre_classifier = Dense(768, activation='relu')
self.dropout = Dropout(0.1)
self.classifier = Dense(2)
def call(self, x):
x = self.bert_model(x)
x = x.last_hidden_state
x = x[:, 0] # get the output of the classification token
x = self.pre_classifier(x)
x = self.dropout(x)
x = self.classifier(x)
return x
```
%% Cell type:code id: tags:
``` python
model = ClassificationModel(bert_model)
x = next(iter(train_ds))[0]
model(x)
model.summary()
```
%% Cell type:markdown id: tags:
## Train!
%% Cell type:code id: tags:
``` python
model.compile(
optimizer=Adam(1e-05),
loss=SparseCategoricalCrossentropy(from_logits=True),
metrics=[SparseCategoricalAccuracy('accuracy')]
)
```
%% Cell type:code id: tags:
``` python
history = model.fit(
train_ds,
epochs=epochs,
verbose=fit_verbosity
)
```
%% Cell type:markdown id: tags:
## Evaluation
%% Cell type:code id: tags:
``` python
_, score = model.evaluate(test_ds)
colors = sns.color_palette('pastel')[2:]
accuracy_score = [score, 1 - score]
plt.pie(
accuracy_score,
labels=["Accurate", "Mistaken"],
colors=colors,
autopct=lambda val: f"{val:.2f}%",
explode=(0.0, 0.1)
)
plt.show()
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [TRANS2] - IMDB, Sentiment analysis with Transformers
<!-- DESC --> Using a Tranformer to perform a sentiment analysis (IMDB) - Colab version
<!-- AUTHOR : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS) -->
By : Hatim Bourfoune (IDRIS) and Nathan Cassereau (IDRIS)
## Objectives :
- Complement the learning of a Transformer to perform a sentiment analysis
- Understand the use of a pre-trained transformer
This task is exactly the same as the Sentiment analysis with text embedding. Only this time,
we are going to exploit the strenght of transformers. Considering how computation-heavy transformer
pretraining is, we are going to use a pretrained BERT model from HuggingFace.
This notebook performs the fine-tuning process. If possible, try to use a GPU to speed up
the training, transformers are difficult to train on CPU.
## What we are going to do:
* Retrieve the dataset
* Prepare the dataset
* Fetch a pretrained BERT model from HuggingFace's platform (https://huggingface.co/models)
* Fine-tune the model on a sequence classification task: the sentiment analysis of the IMDB dataset
* Evaluate the result
%% Cell type:markdown id: tags:
## Installations
**IMPORTANT :** We will need to use the library `transformers` created by HuggingFace.
%% Cell type:code id: tags:
``` python
!pip install transformers
```
%% Cell type:markdown id: tags:
## Imports and initialisation
%% Cell type:code id: tags:
``` python
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.datasets.imdb as imdb
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
from transformers import (
DistilBertTokenizer,
TFDistilBertModel,
DataCollatorWithPadding,
BertTokenizer,
TFBertModel
)
from tqdm.notebook import tqdm
import itertools
import multiprocessing
import os
import matplotlib.pyplot as plt
import seaborn as sns
print("Tensorflow ", tf.__version__)
n_gpus = len(tf.config.list_physical_devices('GPU'))
print("#GPUs: ", n_gpus)
if n_gpus > 0:
!nvidia-smi -L
os.environ["TOKENIZERS_PARALLELISM"] = "true"
np.random.seed(987654321)
tf.random.set_seed(987654321)
```
%% Cell type:markdown id: tags:
## Parameters
* `vocab_size` refers to the number of words which will be remembered in our vocabulary.
* `hide_most_frequently` is the number of ignored words, among the most common ones.
* `review_len` is the review length.
* `n_cpus` is the number of CPU which will be used for data preprocessing.
* `distil` refers to whether or not we are going to use a DistilBert model or a regular Bert model.
%% Cell type:code id: tags:
``` python
vocab_size = 30000
hide_most_frequently = 0
review_len = 512
epochs = 1
batch_size = 32
fit_verbosity = 1
scale = 1
n_cpus = 1
distil = True
```
%% Cell type:markdown id: tags:
## Retrieve the dataset
%% Cell type:code id: tags:
``` python
(x_train, y_train), (x_test, y_test) = imdb.load_data(
num_words=vocab_size,
skip_top=hide_most_frequently,
seed=123456789,
)
y_train = np.asarray(y_train).astype('float32')
y_test = np.asarray(y_test ).astype('float32')
n1 = int(scale * len(x_train))
n2 = int(scale * len(x_test))
x_train, y_train = x_train[:n1], y_train[:n1]
x_test, y_test = x_test[:n2], y_test[:n2]
print("x_train : {} y_train : {}".format(x_train.shape, y_train.shape))
print("x_test : {} y_test : {}".format(x_test.shape, y_test.shape))
print('\nReview sample (x_train[12]) :\n\n',x_train[12])
```
%% Cell type:code id: tags:
``` python
word_index = imdb.get_word_index()
word_index = {w:(i+3) for w,i in word_index.items()}
word_index.update({'[PAD]':0, '[CLS]':1, '[UNK]':2})
index_word = {index:word for word,index in word_index.items()}
# Add a nice function to transpose:
def dataset2text(review):
return ' '.join([index_word.get(i, "?") for i in review[1:]])
```
%% Cell type:code id: tags:
``` python
print(dataset2text(x_train[12]))
```
%% Cell type:markdown id: tags:
## Fetch the model from HuggingFace
%% Cell type:code id: tags:
``` python
def load_model(distil):
if distil:
bert_model = TFDistilBertModel.from_pretrained("distilbert-base-uncased")
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
else:
bert_model = TFBertModel.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
return bert_model, tokenizer
bert_model, tokenizer = load_model(distil)
bert_model.summary()
```
%% Cell type:markdown id: tags:
## Prepare the dataset
%% Cell type:code id: tags:
``` python
def tokenize_sample(sample):
return tokenizer(dataset2text(sample), truncation=True, max_length=review_len)
def distributed_tokenize_dataset(dataset):
ds = list(dataset)
with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
tokenized_ds = list(tqdm(
pool.imap(tokenize_sample, ds),
total=len(ds)
))
return tokenized_ds
tokenized_x_train = distributed_tokenize_dataset(x_train)
tokenized_x_test = distributed_tokenize_dataset(x_test)
```
%% Cell type:code id: tags:
``` python
data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf")
```
%% Cell type:code id: tags:
``` python
data_collator(tokenized_x_train)
```
%% Cell type:code id: tags:
``` python
def make_dataset(x, y):
collated = data_collator(x)
dataset = tf.data.Dataset.from_tensor_slices(
(collated['input_ids'], collated['attention_mask'], y)
)
transformed_dataset = (
dataset
.map(
lambda x, y, z: ((x, y), z)
)
.shuffle(25000)
.batch(batch_size)
)
return transformed_dataset
train_ds = make_dataset(tokenized_x_train, y_train)
test_ds = make_dataset(tokenized_x_test, y_test)
for x, y in train_ds:
print(x)
break
```
%% Cell type:markdown id: tags:
## Add a new head to the model
%% Cell type:code id: tags:
``` python
class ClassificationModel(keras.Model):
def __init__(self, bert_model):
super(ClassificationModel, self).__init__()
self.bert_model = bert_model
self.pre_classifier = Dense(768, activation='relu')
self.dropout = Dropout(0.1)
self.classifier = Dense(2)
def call(self, x):
x = self.bert_model(x)
x = x.last_hidden_state
x = x[:, 0] # get the output of the classification token
x = self.pre_classifier(x)
x = self.dropout(x)
x = self.classifier(x)
return x
```
%% Cell type:code id: tags:
``` python
model = ClassificationModel(bert_model)
x = next(iter(train_ds))[0]
model(x)
model.summary()
```
%% Cell type:markdown id: tags:
## Train!
%% Cell type:code id: tags:
``` python
model.compile(
optimizer=Adam(1e-05),
loss=SparseCategoricalCrossentropy(from_logits=True),
metrics=[SparseCategoricalAccuracy('accuracy')]
)
```
%% Cell type:code id: tags:
``` python
history = model.fit(
train_ds,
epochs=epochs,
verbose=fit_verbosity
)
```
%% Cell type:markdown id: tags:
## Evaluation
%% Cell type:code id: tags:
``` python
_, score = model.evaluate(test_ds)
colors = sns.color_palette('pastel')[2:]
accuracy_score = [score, 1 - score]
plt.pie(
accuracy_score,
labels=["Accurate", "Mistaken"],
colors=colors,
autopct=lambda val: f"{val:.2f}%",
explode=(0.0, 0.1)
)
plt.show()
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3VAE1] - First VAE, using functional API (MNIST dataset)
<!-- DESC --> Construction and training of a VAE, using functional APPI, with a latent space of small dimension.
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Understanding and implementing a **variational autoencoder** neurals network (VAE)
- Understanding **Keras functional API**, using two custom layers
The calculation needs being important, it is preferable to use a very simple dataset such as MNIST to start with.
...MNIST with a small scale if you haven't a GPU ;-)
## What we're going to do :
- Defining a VAE model
- Build the model
- Train it
- Have a look on the train process
## Acknowledgements :
Thanks to **François Chollet** who is at the base of this example (and the creator of Keras !!).
See : https://keras.io/examples/generative/vae
%% Cell type:markdown id: tags:
## Step 1 - Init python stuff
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
from keras import layers
import numpy as np
from modules.layers import SamplingLayer, VariationalLossLayer
from modules.callbacks import ImagesCallback
from modules.datagen import MNIST
import sys
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3VAE1')
```
%% Cell type:markdown id: tags:
## Step 2 - Parameters
`scale` : With scale=1, we need 1'30s on a GPU V100 ...and >20' on a CPU !\
`latent_dim` : 2 dimensions is small, but usefull to draw !\
`fit_verbosity`: Verbosity of training progress bar: 0=silent, 1=progress bar, 2=One line
`loss_weights` : Our **loss function** is the weighted sum of two loss:
- `r_loss` which measures the loss during reconstruction.
- `kl_loss` which measures the dispersion.
The weights are defined by: `loss_weights=[k1,k2]` where : `total_loss = k1*r_loss + k2*kl_loss`
In practice, a value of \[1,.06\] gives good results here.
With scale=0.2, epochs=10 : 3'30 on a laptop
%% Cell type:code id: tags:
``` python
latent_dim = 2
loss_weights = [1,.06]
scale = 0.2
seed = 123
batch_size = 64
epochs = 10
fit_verbosity = 1
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('latent_dim', 'loss_weights', 'scale', 'seed', 'batch_size', 'epochs', 'fit_verbosity')
```
%% Cell type:markdown id: tags:
## Step 3 - Prepare data
`MNIST.get_data()` return : `x_train,y_train, x_test,y_test`, \
but we only need x_train for our training.
%% Cell type:code id: tags:
``` python
x_data, y_data, _,_ = MNIST.get_data(seed=seed, scale=scale, train_prop=1 )
fidle.scrawler.images(x_data[:20], None, indices='all', columns=10, x_size=1,y_size=1,y_padding=0, save_as='01-original')
```
%% Cell type:markdown id: tags:
## Step 4 - Build model
In this example, we will use the **functional API.**
For this, we will use two custom layers :
- `SamplingLayer`, which generates a vector z from the parameters z_mean and z_log_var - See : [SamplingLayer.py](./modules/layers/SamplingLayer.py)
- `VariationalLossLayer`, which allows us to calculate the loss function, loss - See : [VariationalLossLayer.py](./modules/layers/VariationalLossLayer.py)
%% Cell type:markdown id: tags:
#### Encoder
%% Cell type:code id: tags:
``` python
inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, strides=1, padding="same", activation="relu")(inputs)
x = layers.Conv2D(64, 3, strides=2, padding="same", activation="relu")(x)
x = layers.Conv2D(64, 3, strides=2, padding="same", activation="relu")(x)
x = layers.Conv2D(64, 3, strides=1, padding="same", activation="relu")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = SamplingLayer()([z_mean, z_log_var])
encoder = keras.Model(inputs, [z_mean, z_log_var, z], name="encoder")
# encoder.summary()
```
%% Cell type:markdown id: tags:
#### Decoder
%% Cell type:code id: tags:
``` python
inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(inputs)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, strides=1, padding="same", activation="relu")(x)
x = layers.Conv2DTranspose(64, 3, strides=2, padding="same", activation="relu")(x)
x = layers.Conv2DTranspose(32, 3, strides=2, padding="same", activation="relu")(x)
outputs = layers.Conv2DTranspose(1, 3, padding="same", activation="sigmoid")(x)
decoder = keras.Model(inputs, outputs, name="decoder")
# decoder.summary()
```
%% Cell type:markdown id: tags:
#### VAE
We will calculate the loss with a specific layer: `VariationalLossLayer`
See our : modules.layers.[VariationalLossLayer.py](./modules/layers/VariationalLossLayer.py)
%% Cell type:code id: tags:
``` python
inputs = keras.Input(shape=(28, 28, 1))
z_mean, z_log_var, z = encoder(inputs)
outputs = decoder(z)
outputs = VariationalLossLayer(loss_weights=loss_weights)([inputs, z_mean, z_log_var, outputs])
vae=keras.Model(inputs,outputs)
vae.compile(optimizer='adam', loss=None)
```
%% Cell type:markdown id: tags:
## Step 5 - Train
### 5.1 - Using two nice custom callbacks :-)
Two custom callbacks are used:
- `ImagesCallback` : qui va sauvegarder des images durant l'apprentissage - See [ImagesCallback.py](./modules/callbacks/ImagesCallback.py)
- `BestModelCallback` : qui sauvegardera le meilleur model - See [BestModelCallback.py](./modules/callbacks/BestModelCallback.py)
%% Cell type:code id: tags:
``` python
callback_images = ImagesCallback(x=x_data, z_dim=latent_dim, nb_images=5, from_z=True, from_random=True, run_dir=run_dir)
callbacks_list = [callback_images]
```
%% Cell type:markdown id: tags:
### 5.2 - Let's train !
With `scale=1`, need 1'15 on a GPU (V100 at IDRIS) ...or 20' on a CPU
%% Cell type:code id: tags:
``` python
chrono=fidle.Chrono()
chrono.start()
history = vae.fit(x_data, epochs=epochs, batch_size=batch_size, callbacks=callbacks_list, verbose=fit_verbosity)
chrono.show()
```
%% Cell type:markdown id: tags:
## Step 6 - Training review
### 6.1 - History
%% Cell type:code id: tags:
``` python
fidle.scrawler.history(history, plot={"Loss":['loss']}, save_as='history')
```
%% Cell type:markdown id: tags:
### 6.2 - Reconstruction during training
At the end of each epoch, our callback saved some reconstructed images.
Where :
Original image -> encoder -> z -> decoder -> Reconstructed image
%% Cell type:code id: tags:
``` python
images_z, images_r = callback_images.get_images( range(0,epochs,2) )
fidle.utils.subtitle('Original images :')
fidle.scrawler.images(x_data[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)
fidle.utils.subtitle('Encoded/decoded images')
fidle.scrawler.images(images_z, None, indices='all', columns=5, x_size=2,y_size=2, save_as='02-reconstruct')
fidle.utils.subtitle('Original images :')
fidle.scrawler.images(x_data[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)
```
%% Cell type:markdown id: tags:
### 6.3 - Generation (latent -> decoder)
%% Cell type:code id: tags:
``` python
fidle.utils.subtitle('Generated images from latent space')
fidle.scrawler.images(images_r, None, indices='all', columns=5, x_size=2,y_size=2, save_as='03-generated')
```
%% Cell type:markdown id: tags:
## Annexe - Model Save and reload
Save our model
%% Cell type:code id: tags:
``` python
os.makedirs(f'{run_dir}/models', exist_ok=True)
filename = run_dir+'/models/my_model.keras'
vae.save(filename)
```
%% Cell type:markdown id: tags:
Reload it
%% Cell type:code id: tags:
``` python
vae_reloaded = keras.models.load_model( filename,
custom_objects={ 'SamplingLayer': SamplingLayer,
'VariationalLossLayer':VariationalLossLayer})
```
%% Cell type:markdown id: tags:
Play with our decoder !
%% Cell type:code id: tags:
``` python
decoder = vae.get_layer('decoder')
img = decoder( np.array([[-1,.1]]))
fidle.scrawler.images(img.detach().cpu().numpy(), x_size=2,y_size=2, save_as='04-example')
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>