Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • daconcea/fidle
  • bossardl/fidle
  • Julie.Remenant/fidle
  • abijolao/fidle
  • monsimau/fidle
  • karkars/fidle
  • guilgautier/fidle
  • cailletr/fidle
  • talks/fidle
9 results
Show changes
Showing
with 4823 additions and 582 deletions
# ------------------------------------------------------------------
# _____ _ _ _
# | ___(_) __| | | ___
# | |_ | |/ _` | |/ _ \
# | _| | | (_| | | __/
# |_| |_|\__,_|_|\___| Basic GAN LigthningModule
# ------------------------------------------------------------------
# Formation Introduction au Deep Learning (FIDLE)
# CNRS/MIAI - https://fidle.cnrs.fr
# ------------------------------------------------------------------
# JL Parouty (Mars 2024)
import sys
import numpy as np
import torch
import torch.nn.functional as F
import torchvision
from lightning import LightningModule
class GAN(LightningModule):
# -------------------------------------------------------------------------
# Init
# -------------------------------------------------------------------------
#
def __init__(
self,
data_shape = (None,None,None),
latent_dim = None,
lr = 0.0002,
b1 = 0.5,
b2 = 0.999,
batch_size = 64,
generator_name = None,
discriminator_name = None,
**kwargs,
):
super().__init__()
print('\n---- GAN initialization --------------------------------------------')
# ---- Hyperparameters
#
# Enable Lightning to store all the provided arguments under the self.hparams attribute.
# These hyperparameters will also be stored within the model checkpoint.
#
self.save_hyperparameters()
print('Hyperarameters are :')
for name,value in self.hparams.items():
print(f'{name:24s} : {value}')
# ---- Because we have more than one optimizer
#
self.automatic_optimization = False
# ---- Generator/Discriminator instantiation
#
print('Submodels :')
module=sys.modules['__main__']
class_g = getattr(module, generator_name)
class_d = getattr(module, discriminator_name)
self.generator = class_g( latent_dim=latent_dim, data_shape=data_shape)
self.discriminator = class_d( latent_dim=latent_dim, data_shape=data_shape)
# ---- Validation and example data
#
self.validation_z = torch.randn(8, self.hparams.latent_dim)
self.example_input_array = torch.zeros(2, self.hparams.latent_dim)
def forward(self, z):
return self.generator(z)
def adversarial_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y)
def training_step(self, batch, batch_idx):
imgs = batch
batch_size = batch.size(0)
optimizer_g, optimizer_d = self.optimizers()
# ---- Get some latent space vectors
# We use type_as() to make sure we initialize z on the right device (GPU/CPU).
#
z = torch.randn(batch_size, self.hparams.latent_dim)
z = z.type_as(imgs)
# ---- Train generator ------------------------------------------------
# Generator use optimizer #0
# We try to generate false images that could mislead the discriminator
#
self.toggle_optimizer(optimizer_g)
# Generate fake images
self.fake_imgs = self.generator.forward(z)
# Assemble labels that say all images are real, yes it's a lie ;-)
# put on GPU because we created this tensor inside training_loop
misleading_labels = torch.ones(batch_size, 1)
misleading_labels = misleading_labels.type_as(imgs)
# Adversarial loss is binary cross-entropy
y_hat = self.discriminator.forward(self.fake_imgs)
# print(y_hat)
g_loss = self.adversarial_loss(self.discriminator.forward(self.fake_imgs), misleading_labels)
self.log("g_loss", g_loss, prog_bar=True)
# Backward loss
self.manual_backward(g_loss)
optimizer_g.step()
optimizer_g.zero_grad()
self.untoggle_optimizer(optimizer_g)
# ---- Train discriminator --------------------------------------------
# Discriminator use optimizer #1
# We try to make the difference between fake images and real ones
#
self.toggle_optimizer(optimizer_d)
# These images are reals
real_labels = torch.ones(batch_size, 1)
real_labels = real_labels.type_as(imgs)
pred_labels = self.discriminator.forward(imgs)
real_loss = self.adversarial_loss(pred_labels, real_labels)
# These images are fake
fake_imgs = self.generator.forward(z)
fake_labels = torch.zeros(batch_size, 1)
fake_labels = fake_labels.type_as(imgs)
fake_loss = self.adversarial_loss(self.discriminator(fake_imgs.detach()), fake_labels)
# Discriminator loss is the average
d_loss = (real_loss + fake_loss) / 2
self.log("d_loss", d_loss, prog_bar=True)
# Backward
self.manual_backward(d_loss)
optimizer_d.step()
optimizer_d.zero_grad()
self.untoggle_optimizer(optimizer_d)
def configure_optimizers(self):
lr = self.hparams.lr
b1 = self.hparams.b1
b2 = self.hparams.b2
# With a GAN, we need 2 separate optimizer.
# opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))
# opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2),)
opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr)
opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr)
return [opt_g, opt_d], []
def on_train_epoch_end(self):
# ---- Log Graph
#
if(self.current_epoch==1):
sampleImg=torch.rand((1,28,28,1))
sampleImg=sampleImg.type_as(self.generator.model[0].weight)
self.logger.experiment.add_graph(self.discriminator,sampleImg)
# ---- Log some of these images
#
z = torch.randn(self.hparams.batch_size, self.hparams.latent_dim)
z = z.type_as(self.generator.model[0].weight)
sample_imgs = self.generator(z)
sample_imgs = sample_imgs.permute(0, 3, 1, 2) # from NHWC to NCHW
grid = torchvision.utils.make_grid(tensor=sample_imgs, nrow=12, )
self.logger.experiment.add_image(f"Generated images", grid,self.current_epoch)
# ------------------------------------------------------------------
# _____ _ _ _
# | ___(_) __| | | ___
# | |_ | |/ _` | |/ _ \
# | _| | | (_| | | __/
# |_| |_|\__,_|_|\___| GAN / Generators
# ------------------------------------------------------------------
# Formation Introduction au Deep Learning (FIDLE)
# CNRS/MIAI - https://fidle.cnrs.fr
# ------------------------------------------------------------------
# JL Parouty (Mars 2024)
import numpy as np
import torch.nn as nn
# -----------------------------------------------------------------------------
# -- Generator n°1
# -----------------------------------------------------------------------------
#
class Generator_1(nn.Module):
def __init__(self, latent_dim=None, data_shape=None):
super().__init__()
self.latent_dim = latent_dim
self.img_shape = data_shape
print('init generator 1 : ',latent_dim,' to ',data_shape)
self.model = nn.Sequential(
nn.Linear(latent_dim, 128),
nn.ReLU(),
nn.Linear(128,256),
nn.BatchNorm1d(256, 0.8),
nn.ReLU(),
nn.Linear(256, 512),
nn.BatchNorm1d(512, 0.8),
nn.ReLU(),
nn.Linear(512, 1024),
nn.BatchNorm1d(1024, 0.8),
nn.ReLU(),
nn.Linear(1024, int(np.prod(data_shape))),
nn.Sigmoid()
)
def forward(self, z):
img = self.model(z)
img = img.view(img.size(0), *self.img_shape)
return img
# -----------------------------------------------------------------------------
# -- Generator n°1
# -----------------------------------------------------------------------------
#
class Generator_2(nn.Module):
def __init__(self, latent_dim=None, data_shape=None):
super().__init__()
self.latent_dim = latent_dim
self.img_shape = data_shape
print('init generator 2 : ',latent_dim,' to ',data_shape)
self.model = nn.Sequential(
nn.Linear(latent_dim, 7*7*64),
nn.Unflatten(1, (64,7,7)),
# nn.UpsamplingNearest2d( scale_factor=2 ),
nn.UpsamplingBilinear2d( scale_factor=2 ),
nn.Conv2d( 64,128, (3,3), stride=(1,1), padding=(1,1) ),
nn.ReLU(),
nn.BatchNorm2d(128),
# nn.UpsamplingNearest2d( scale_factor=2 ),
nn.UpsamplingBilinear2d( scale_factor=2 ),
nn.Conv2d( 128,256, (3,3), stride=(1,1), padding=(1,1)),
nn.ReLU(),
nn.BatchNorm2d(256),
nn.Conv2d( 256,1, (5,5), stride=(1,1), padding=(2,2)),
nn.Sigmoid()
)
def forward(self, z):
img_nchw = self.model(z)
img_nhwc = img_nchw.permute(0, 2, 3, 1) # reformat from NCHW to NHWC
# img = img.view(img.size(0), *self.img_shape) # reformat from NCHW to NHWC
return img_nhwc
# ------------------------------------------------------------------
# _____ _ _ _
# | ___(_) __| | | ___
# | |_ | |/ _` | |/ _ \
# | _| | | (_| | | __/
# |_| |_|\__,_|_|\___| GAN / QuickDrawDataModule
# ------------------------------------------------------------------
# Formation Introduction au Deep Learning (FIDLE)
# CNRS/MIAI - https://fidle.cnrs.fr
# ------------------------------------------------------------------
# JL Parouty (Mars 2024)
import numpy as np
import torch
from lightning import LightningDataModule
from torch.utils.data import DataLoader
class QuickDrawDataModule(LightningDataModule):
def __init__( self, dataset_file='./sheep.npy', scale=1., batch_size=64, num_workers=4 ):
super().__init__()
print('\n---- QuickDrawDataModule initialization ----------------------------')
print(f'with : scale={scale} batch size={batch_size}')
self.scale = scale
self.dataset_file = dataset_file
self.batch_size = batch_size
self.num_workers = num_workers
self.dims = (28, 28, 1)
self.num_classes = 10
def prepare_data(self):
pass
def setup(self, stage=None):
print('\nDataModule Setup :')
# Load dataset
# Called at the beginning of each stage (train,val,test)
# Here, whatever the stage value, we'll have only one set.
data = np.load(self.dataset_file)
print('Original dataset shape : ',data.shape)
# Rescale
n=int(self.scale*len(data))
data = data[:n]
print('Rescaled dataset shape : ',data.shape)
# Normalize, reshape and shuffle
data = data/255
data = data.reshape(-1,28,28,1)
data = torch.from_numpy(data).float()
print('Final dataset shape : ',data.shape)
print('Dataset loaded and ready.')
self.data_train = data
def train_dataloader(self):
# Note : Numpy ndarray is Dataset compliant
# Have map-style interface. See https://pytorch.org/docs/stable/data.html
return DataLoader( self.data_train, batch_size=self.batch_size, num_workers=self.num_workers )
\ No newline at end of file
# ------------------------------------------------------------------
# _____ _ _ _
# | ___(_) __| | | ___
# | |_ | |/ _` | |/ _ \
# | _| | | (_| | | __/
# |_| |_|\__,_|_|\___| WGANGP LigthningModule
# ------------------------------------------------------------------
# Formation Introduction au Deep Learning (FIDLE)
# CNRS/MIAI - https://fidle.cnrs.fr
# ------------------------------------------------------------------
# JL Parouty (Mars 2024)
import sys
import numpy as np
import torch
import torch.nn.functional as F
import torchvision
from lightning import LightningModule
class WGANGP(LightningModule):
# -------------------------------------------------------------------------
# Init
# -------------------------------------------------------------------------
#
def __init__(
self,
data_shape = (None,None,None),
latent_dim = None,
lr = 0.0002,
b1 = 0.5,
b2 = 0.999,
batch_size = 64,
lambda_gp = 10,
generator_name = None,
discriminator_name = None,
**kwargs,
):
super().__init__()
print('\n---- GAN initialization --------------------------------------------')
# ---- Hyperparameters
#
# Enable Lightning to store all the provided arguments under the self.hparams attribute.
# These hyperparameters will also be stored within the model checkpoint.
#
self.save_hyperparameters()
print('Hyperarameters are :')
for name,value in self.hparams.items():
print(f'{name:24s} : {value}')
# ---- Because we have more than one optimizer
#
self.automatic_optimization = False
# ---- Generator/Discriminator instantiation
#
print('Submodels :')
module=sys.modules['__main__']
class_g = getattr(module, generator_name)
class_d = getattr(module, discriminator_name)
self.generator = class_g( latent_dim=latent_dim, data_shape=data_shape)
self.discriminator = class_d( latent_dim=latent_dim, data_shape=data_shape)
# ---- Validation and example data
#
self.validation_z = torch.randn(8, self.hparams.latent_dim)
self.example_input_array = torch.zeros(2, self.hparams.latent_dim)
def forward(self, z):
return self.generator(z)
def adversarial_loss(self, y_pred, y):
return F.binary_cross_entropy(y_pred, y)
def gradient_penalty(self, real_images, fake_images):
# see: https://medium.com/dejunhuang/implementing-gan-and-wgan-in-pytorch-551099afde3c
batch_size = real_images.size(0)
# ---- Create interpolate images
#
# Get a random vector : size=([batch_size])
epsilon = torch.distributions.uniform.Uniform(0, 1).sample([batch_size])
# Add dimensions to match images batch : size=([batch_size,1,1,1])
epsilon = epsilon[:, None, None, None]
# Put epsilon a the right place
epsilon = epsilon.type_as(real_images)
# Do interpolation
interpolates = epsilon * fake_images + ((1 - epsilon) * real_images)
# ---- Use autograd to compute gradient
#
# The key to making this work is including `create_graph`, this means that the computations
# in this penalty will be added to the computation graph for the loss function, so that the
# second partial derivatives will be correctly computed.
#
interpolates.requires_grad_()
pred_labels = self.discriminator.forward(interpolates)
gradients = torch.autograd.grad( inputs = interpolates,
outputs = pred_labels,
grad_outputs = torch.ones_like(pred_labels),
create_graph = True,
retain_graph = True,
only_inputs = True )[0]
grad_flat = gradients.view(batch_size, -1)
grad_norm = torch.linalg.norm(grad_flat, dim=1)
grad_penalty = (grad_norm - 1) ** 2
# gp = torch.pow(grads.norm(2, dim=1) - 1, 2).mean()
return grad_penalty
def training_step(self, batch, batch_idx):
real_imgs = batch
batch_size = batch.size(0)
lambda_gp = self.hparams.lambda_gp
optimizer_g, optimizer_d = self.optimizers()
# ---- Get some latent space vectors
# We use type_as() to make sure we initialize z on the right device (GPU/CPU).
#
z = torch.randn(batch_size, self.hparams.latent_dim)
z = z.type_as(real_imgs)
# ---- Train generator ------------------------------------------------
# Generator use optimizer #0
# We try to generate false images that could mislead the discriminator
# ---------------------------------------------------------------------
#
self.toggle_optimizer(optimizer_g)
# Get fake images
fake_imgs = self.generator.forward(z)
# Get critics
critics = self.discriminator.forward(fake_imgs)
# Loss
g_loss = -critics.mean()
# Log
self.log("g_loss", g_loss, prog_bar=True)
# Backward loss
self.manual_backward(g_loss)
optimizer_g.step()
optimizer_g.zero_grad()
self.untoggle_optimizer(optimizer_g)
# ---- Train discriminator --------------------------------------------
# Discriminator use optimizer #1
# We try to make the difference between fake images and real ones
# ---------------------------------------------------------------------
#
self.toggle_optimizer(optimizer_d)
# Get critics
critics_real = self.discriminator.forward(real_imgs)
critics_fake = self.discriminator.forward(fake_imgs.detach())
# Get gradient penalty
grad_penalty = self.gradient_penalty(real_imgs, fake_imgs.detach())
# Loss
d_loss = critics_fake.mean() - critics_real.mean() + lambda_gp*grad_penalty.mean()
# Log loss
self.log("d_loss", d_loss, prog_bar=True)
# Backward
self.manual_backward(d_loss)
optimizer_d.step()
optimizer_d.zero_grad()
self.untoggle_optimizer(optimizer_d)
def configure_optimizers(self):
lr = self.hparams.lr
b1 = self.hparams.b1
b2 = self.hparams.b2
# With a GAN, we need 2 separate optimizer.
# opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))
# opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2),)
opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr)
opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr)
return [opt_g, opt_d], []
def on_train_epoch_end(self):
# ---- Log Graph
#
if(self.current_epoch==1):
sampleImg=torch.rand((1,28,28,1))
sampleImg=sampleImg.type_as(self.generator.model[0].weight)
self.logger.experiment.add_graph(self.discriminator,sampleImg)
# ---- Log some of these images
#
z = torch.randn(self.hparams.batch_size, self.hparams.latent_dim)
z = z.type_as(self.generator.model[0].weight)
sample_imgs = self.generator(z)
sample_imgs = sample_imgs.permute(0, 3, 1, 2) # from NHWC to NCHW
grid = torchvision.utils.make_grid(tensor=sample_imgs, nrow=12, )
self.logger.experiment.add_image(f"Generated images", grid,self.current_epoch)
%% Cell type:markdown id:756b572d tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [DDPM1] - Fashion MNIST Generation with DDPM
<!-- DESC --> Diffusion Model example, to generate Fashion MNIST images.
<!-- AUTHOR : Hatim Bourfoune (CNRS/IDRIS), Maxime Song (CNRS/IDRIS) -->
## Objectives :
- Understanding and implementing a **Diffusion Model** neurals network (DDPM)
The calculation needs being important, it is preferable to use a very simple dataset such as MNIST to start with.
...MNIST with a small scale (need to adapt the code !) if you haven't a GPU ;-)
## Acknowledgements :
This notebook was heavily inspired by this [article](https://huggingface.co/blog/annotated-diffusion) and this [notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/annotated_diffusion.ipynb#scrollTo=5153024b).
%% Cell type:code id:54a15542 tags:
``` python
import math
from inspect import isfunction
from functools import partial
import random
import IPython
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from einops import rearrange
import torch
from torch import nn, einsum
import torch.nn.functional as F
from datasets import load_dataset, load_from_disk
from torchvision import transforms
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import numpy as np
from PIL import Image
from torch.optim import Adam
from torchvision.transforms import Compose, ToTensor, Lambda, ToPILImage, CenterCrop, Resize
import matplotlib.pyplot as plt
```
%% Cell type:code id:a854c28a tags:
``` python
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Reproductibility
torch.manual_seed(53)
random.seed(53)
np.random.seed(53)
```
%% Cell type:markdown id:e33f10db tags:
## Create dataset
We will use the library HuggingFace Datasets to get our Fashion MNIST. If you are using Jean Zay, the dataset is already downloaded in the DSDIR, so you can use the code as it is. If you are not using Jean Zay, you should use the function load_dataset (commented) instead of load_from_disk. It will automatically download the dataset if it is not downloaded already.
%% Cell type:code id:918c0138 tags:
``` python
dataset = load_dataset("fashion_mnist")
dataset
```
%% Cell type:markdown id:cfe4d4f5 tags:
As you can see the dataset is composed of two subparts: train and test. So the dataset is already split for us. We'll use the train part for now. <br/>
We can also see that the dataset as two features per sample: 'image' corresponding to the PIL version of the image and 'label' corresponding to the class of the image (shoe, shirt...). We can also see that there are 60 000 samples in our train dataset.
%% Cell type:code id:2280400d tags:
``` python
train_dataset = dataset['train']
train_dataset[0]
```
%% Cell type:markdown id:7978ad3d tags:
Each sample of a HuggingFace dataset is a dictionary containing the data.
%% Cell type:code id:0d157e11 tags:
``` python
image = train_dataset[0]['image']
image
```
%% Cell type:code id:5dea3e5a tags:
``` python
image_array = np.asarray(image, dtype=np.uint8)
print(f"shape of the image: {image_array.shape}")
print(f"min: {image_array.min()}, max: {image_array.max()}")
```
%% Cell type:markdown id:f86937e9 tags:
We will now create a function that get the Fashion MNIST dataset needed, apply all the transformations we want on it and encapsulate that dataset in a dataloader.
%% Cell type:code id:e646a7b1 tags:
``` python
# load hugging face dataset from the DSDIR
def get_dataset(data_path, batch_size, test = False):
dataset = load_from_disk(data_path)
# dataset = load_dataset(data_path) # Use this one if you're not on Jean Zay
# define image transformations (e.g. using torchvision)
transform = Compose([
transforms.RandomHorizontalFlip(), # Data augmentation
transforms.ToTensor(), # Transform PIL image into tensor of value between [0,1]
transforms.Lambda(lambda t: (t * 2) - 1) # Normalize values between [-1,1]
])
# define function for HF dataset transform
def transforms_im(examples):
examples['pixel_values'] = [transform(image) for image in examples['image']]
del examples['image']
return examples
dataset = dataset.with_transform(transforms_im).remove_columns('label') # We don't need it
channels, image_size, _ = dataset['train'][0]['pixel_values'].shape
if test:
dataloader = DataLoader(dataset['test'], batch_size=batch_size)
else:
dataloader = DataLoader(dataset['train'], batch_size=batch_size, shuffle=True)
len_dataloader = len(dataloader)
print(f"channels: {channels}, image dimension: {image_size}, len_dataloader: {len_dataloader}")
return dataloader, channels, image_size, len_dataloader
```
%% Cell type:markdown id:413a3fea tags:
We choose the parameters and we instantiate the dataset:
%% Cell type:code id:918233da tags:
``` python
# Dataset parameters
batch_size = 64
data_path = "/gpfsdswork/dataset/HuggingFace/fashion_mnist/fashion_mnist/"
# data_path = "fashion_mnist" # If you're not using Jean Zay
```
%% Cell type:code id:85939f9d tags:
``` python
train_dataloader, channels, image_size, len_dataloader = get_dataset(data_path, batch_size)
batch_image = next(iter(train_dataloader))['pixel_values']
batch_image.shape
```
%% Cell type:markdown id:104db929 tags:
We also create a function that allows us to see a batch of images:
%% Cell type:code id:196370c2 tags:
``` python
def normalize_im(images):
shape = images.shape
images = images.view(shape[0], -1)
images -= images.min(1, keepdim=True)[0]
images /= images.max(1, keepdim=True)[0]
return images.view(shape)
def show_images(batch):
plt.imshow(torch.permute(make_grid(normalize_im(batch)), (1,2,0)))
plt.show()
```
%% Cell type:code id:96334e60 tags:
``` python
show_images(batch_image[:])
```
%% Cell type:markdown id:1befee67 tags:
## Forward Diffusion
The aim of this part is to create a function that will add noise to any image at any step (following the DDPM diffusion process).
%% Cell type:markdown id:231629ad tags:
### Beta scheduling
First, we create a function that will compute every betas of every steps (following a specific shedule). We will only create a function for the linear schedule (original DDPM) and the cosine schedule (improved DDPM):
%% Cell type:code id:0039d38d tags:
``` python
# Different type of beta schedule
def linear_beta_schedule(timesteps, beta_start = 0.0001, beta_end = 0.02):
"""
linar schedule from the original DDPM paper https://arxiv.org/abs/2006.11239
"""
return torch.linspace(beta_start, beta_end, timesteps)
def cosine_beta_schedule(timesteps, s=0.008):
"""
cosine schedule as proposed in https://arxiv.org/abs/2102.09672
"""
steps = timesteps + 1
x = torch.linspace(0, timesteps, steps)
alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * torch.pi * 0.5) ** 2
alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
return torch.clip(betas, 0.0001, 0.9999)
```
%% Cell type:markdown id:e18d1b38 tags:
### Constants calculation
We will now create a function to calculate every constants we need for our Diffusion Model. <br/>
Constants:
- $ \beta_t $: betas
- $ \sqrt{\frac{1}{\alpha_t}} $: sqrt_recip_alphas
- $ \sqrt{\bar{\alpha}_t} $: sqrt_alphas_cumprod
- $ \sqrt{1-\bar{\alpha}_t} $: sqrt_one_minus_alphas_cumprod
- $ \tilde{\beta}_t = \beta_t\frac{1-\bar{\alpha}_{t-1}}{1-\bar{\alpha}_t} $: posterior_variance
%% Cell type:code id:84251513 tags:
``` python
# Function to get alphas and betas
def get_alph_bet(timesteps, schedule=cosine_beta_schedule):
# define beta
betas = schedule(timesteps)
# define alphas
alphas = 1. - betas
alphas_cumprod = torch.cumprod(alphas, axis=0) # cumulative product of alpha
alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0) # corresponding to the prev const
sqrt_recip_alphas = torch.sqrt(1.0 / alphas)
# calculations for diffusion q(x_t | x_{t-1}) and others
sqrt_alphas_cumprod = torch.sqrt(alphas_cumprod)
sqrt_one_minus_alphas_cumprod = torch.sqrt(1. - alphas_cumprod)
# calculations for posterior q(x_{t-1} | x_t, x_0)
posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod)
const_dict = {
'betas': betas,
'sqrt_recip_alphas': sqrt_recip_alphas,
'sqrt_alphas_cumprod': sqrt_alphas_cumprod,
'sqrt_one_minus_alphas_cumprod': sqrt_one_minus_alphas_cumprod,
'posterior_variance': posterior_variance
}
return const_dict
```
%% Cell type:markdown id:d5658d8e tags:
### Difference between Linear and Cosine schedule
We can check the differences between the constants when we change the parameters:
%% Cell type:code id:7bfdf98c tags:
``` python
T = 1000
const_linear_dict = get_alph_bet(T, schedule=linear_beta_schedule)
const_cosine_dict = get_alph_bet(T, schedule=cosine_beta_schedule)
plt.plot(np.arange(T), const_linear_dict['sqrt_alphas_cumprod'], color='r', label='linear')
plt.plot(np.arange(T), const_cosine_dict['sqrt_alphas_cumprod'], color='g', label='cosine')
# Naming the x-axis, y-axis and the whole graph
plt.xlabel("Step")
plt.ylabel("alpha_bar")
plt.title("Linear and Cosine schedules")
# Adding legend, which helps us recognize the curve according to it's color
plt.legend()
# To load the display window
plt.show()
```
%% Cell type:markdown id:b1537984 tags:
### Definition of $ q(x_t|x_0) $
%% Cell type:code id:cb10e05b tags:
``` python
# extract the values needed for time t
def extract(constants, batch_t, x_shape):
diffusion_batch_size = batch_t.shape[0]
# get a list of the appropriate constants of each timesteps
out = constants.gather(-1, batch_t.cpu())
return out.reshape(diffusion_batch_size, *((1,) * (len(x_shape) - 1))).to(batch_t.device)
```
%% Cell type:markdown id:2f5991bd tags:
Now that we have every constants that we need, we can create a function that will add noise to an image following the forward diffusion process. This function (q_sample) corresponds to $ q(x_t|x_0) $:
![q_sample](https://docs.google.com/drawings/d/e/2PACX-1vQJ55FfJZ8FehNhnIEEeWUDaOAZqK5BuaadB9Xacx2bA222nNApwMHYzhgILaUrze_pTlc974BELJ2D/pub?w=3210&h=651)
%% Cell type:code id:28645450 tags:
``` python
# forward diffusion (using the nice property)
def q_sample(constants_dict, batch_x0, batch_t, noise=None):
if noise is None:
noise = torch.randn_like(batch_x0)
sqrt_alphas_cumprod_t = extract(constants_dict['sqrt_alphas_cumprod'], batch_t, batch_x0.shape)
sqrt_one_minus_alphas_cumprod_t = extract(
constants_dict['sqrt_one_minus_alphas_cumprod'], batch_t, batch_x0.shape
)
return sqrt_alphas_cumprod_t * batch_x0 + sqrt_one_minus_alphas_cumprod_t * noise
```
%% Cell type:markdown id:dcc05f40 tags:
We can now visualize how the forward diffusion process adds noise gradually the image according to its parameters:
%% Cell type:code id:7ed20740 tags:
``` python
T = 1000
const_linear_dict = get_alph_bet(T, schedule=linear_beta_schedule)
const_cosine_dict = get_alph_bet(T, schedule=cosine_beta_schedule)
batch_t = torch.arange(batch_size)*(T//batch_size) # get a range of timesteps from 0 to T
print(f"timesteps: {batch_t}")
noisy_batch_linear = q_sample(const_linear_dict, batch_image, batch_t, noise=None)
noisy_batch_cosine = q_sample(const_cosine_dict, batch_image, batch_t, noise=None)
print("Original images:")
show_images(batch_image[:])
print("Noised images with linear shedule:")
show_images(noisy_batch_linear[:])
print("Noised images with cosine shedule:")
show_images(noisy_batch_cosine[:])
```
%% Cell type:markdown id:565d3c80 tags:
## Reverse Diffusion Process
%% Cell type:markdown id:251808b0 tags:
### Model definition
The reverse diffusion process is made by a deep learning model. We choosed a Unet model with attention. The model is optimized following some papers like [ConvNeXt](https://arxiv.org/pdf/2201.03545.pdf). You can inspect the model in the model.py file.
%% Cell type:code id:29f00028 tags:
``` python
from model import Unet
model = Unet(
dim=28,
init_dim=None,
out_dim=None,
dim_mults=(1, 2, 4),
channels=1,
with_time_emb=True,
convnext_mult=2,
)
```
%% Cell type:markdown id:0aaf936c tags:
### Definition of $ p_{\theta}(x_{t-1}|x_t) $
Now we need a function to retrieve $x_{t-1}$ from $x_t$ and the predicted $z_t$. It corresponds to the reverse diffusion kernel:
![p_sample](https://docs.google.com/drawings/d/e/2PACX-1vRogMTbBI_MtUz2WvFRKef0IKSNaKuFe475llm8nARBbvVCxezq4L00wJV7HjJSLm5mvODncdHDQvKq/pub?w=4407&h=679)
%% Cell type:code id:00443d8e tags:
``` python
@torch.no_grad()
def p_sample(constants_dict, batch_xt, predicted_noise, batch_t):
# We first get every constants needed and send them in right device
betas_t = extract(constants_dict['betas'], batch_t, batch_xt.shape).to(batch_xt.device)
sqrt_one_minus_alphas_cumprod_t = extract(
constants_dict['sqrt_one_minus_alphas_cumprod'], batch_t, batch_xt.shape
).to(batch_xt.device)
sqrt_recip_alphas_t = extract(
constants_dict['sqrt_recip_alphas'], batch_t, batch_xt.shape
).to(batch_xt.device)
# Equation 11 in the ddpm paper
# Use predicted noise to predict the mean (mu theta)
model_mean = sqrt_recip_alphas_t * (
batch_xt - betas_t * predicted_noise / sqrt_one_minus_alphas_cumprod_t
)
# We have to be careful to not add noise if we want to predict the final image
predicted_image = torch.zeros(batch_xt.shape).to(batch_xt.device)
t_zero_index = (batch_t == torch.zeros(batch_t.shape).to(batch_xt.device))
# Algorithm 2 line 4, we add noise when timestep is not 1:
posterior_variance_t = extract(constants_dict['posterior_variance'], batch_t, batch_xt.shape)
noise = torch.randn_like(batch_xt) # create noise, same shape as batch_x
predicted_image[~t_zero_index] = model_mean[~t_zero_index] + (
torch.sqrt(posterior_variance_t[~t_zero_index]) * noise[~t_zero_index]
)
# If t=1 we don't add noise to mu
predicted_image[t_zero_index] = model_mean[t_zero_index]
return predicted_image
```
%% Cell type:markdown id:c6e13aa1 tags:
## Sampling
%% Cell type:markdown id:459df8a2 tags:
We will now create the sampling function. Given trained model, it should generate all the images we want.
%% Cell type:markdown id:1e3cdf15 tags:
With the reverse diffusion process and a trained model, we can now make the sampling function corresponding to this algorithm:
![sampling](https://docs.google.com/drawings/d/e/2PACX-1vT205aFxllD7gspWypXkoJVvkftJU0B0AiBbHZvZvmHFx_ntqY0oofBD_i874FNrrbJ1CWrOwWwLtUg/pub?w=1398&h=671)
%% Cell type:code id:710ef636 tags:
``` python
# Algorithm 2 (including returning all images)
@torch.no_grad()
def sampling(model, shape, T, constants_dict):
b = shape[0]
# start from pure noise (for each example in the batch)
batch_xt = torch.randn(shape, device=DEVICE)
batch_t = torch.ones(shape[0]) * T # create a vector with batch-size time the timestep
batch_t = batch_t.type(torch.int64).to(DEVICE)
imgs = []
for t in tqdm(reversed(range(0, T)), desc='sampling loop time step', total=T):
batch_t -= 1
predicted_noise = model(batch_xt, batch_t)
batch_xt = p_sample(constants_dict, batch_xt, predicted_noise, batch_t)
imgs.append(batch_xt.cpu())
return imgs
```
%% Cell type:markdown id:df50675e tags:
## Training
We will instantiate every objects needed with fixed parameters here. We can try different hyperparameters by coming back here and changing the parameters.
%% Cell type:code id:a3884522 tags:
``` python
# Dataset parameters
batch_size = 64
data_path = "/gpfsdswork/dataset/HuggingFace/fashion_mnist/fashion_mnist/"
# data_path = "fashion_mnist" # If you're not using Jean Zay
train_dataloader, channels, image_size, len_dataloader = get_dataset(data_path, batch_size)
```
%% Cell type:code id:b6b4a2bd tags:
``` python
constants_dict = get_alph_bet(T, schedule=linear_beta_schedule)
```
%% Cell type:code id:ba387427 tags:
``` python
epochs = 3
T = 1000 # = T
```
%% Cell type:code id:31933494 tags:
``` python
model = Unet(
dim=image_size,
init_dim=None,
out_dim=None,
dim_mults=(1, 2, 4),
channels=channels,
with_time_emb=True,
convnext_mult=2,
).to(DEVICE)
```
%% Cell type:code id:92fb2a17 tags:
``` python
criterion = nn.SmoothL1Loss()
optimizer = Adam(model.parameters(), lr=1e-4)
```
%% Cell type:markdown id:f059d28f tags:
### Training loop
![training_algorithm](https://docs.google.com/drawings/d/e/2PACX-1vRZYVrTttVD1qk5YjVT_CmQfFz2kR2cIqIMHKV4QE6LWU67mUl14NJowz-GKldITkFwsR5iM6w3epKl/pub?w=1395&h=670)
%% Cell type:code id:4bab979d tags:
``` python
for epoch in range(epochs):
loop = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs}")
for batch in loop:
optimizer.zero_grad()
batch_size_iter = batch["pixel_values"].shape[0]
batch_image = batch["pixel_values"].to(DEVICE)
# Algorithm 1 line 3: sample t uniformally for every example in the batch
batch_t = torch.randint(0, T, (batch_size_iter,), device=DEVICE).long()
noise = torch.randn_like(batch_image)
x_noisy = q_sample(constants_dict, batch_image, batch_t, noise=noise)
predicted_noise = model(x_noisy, batch_t)
loss = criterion(noise, predicted_noise)
loop.set_postfix(loss=loss.item())
loss.backward()
optimizer.step()
print("check generation:")
list_gen_imgs = sampling(model, (batch_size, channels, image_size, image_size), T, constants_dict)
show_images(list_gen_imgs[-1])
```
%% Cell type:markdown id:2489e819 tags:
## View of the diffusion process
%% Cell type:code id:09ce451d tags:
``` python
def make_gif(frame_list):
to_pil = ToPILImage()
frames = [to_pil(make_grid(normalize_im(tens_im))) for tens_im in frame_list]
frame_one = frames[0]
frame_one.save("sampling.gif.png", format="GIF", append_images=frames[::5], save_all=True, duration=10, loop=0)
return IPython.display.Image(filename="./sampling.gif.png")
```
%% Cell type:code id:4f665ac3 tags:
``` python
make_gif(list_gen_imgs)
```
%% Cell type:markdown id:bfa40b6b tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
# <!-- TITLE --> [DDPM2] - DDPM Python classes
# <!-- DESC --> Python classes used by DDMP Example
# <!-- AUTHOR : Hatim Bourfoune (CNRS/IDRIS), Maxime Song (CNRS/IDRIS) -->
import torch
from torch import nn, einsum
import torch.nn.functional as F
from inspect import isfunction
from functools import partial
import math
from einops import rearrange
def exists(x):
return x is not None
def default(val, d):
if exists(val):
return val
return d() if isfunction(d) else d
class Residual(nn.Module):
def __init__(self, fn):
super().__init__()
self.fn = fn
def forward(self, x, *args, **kwargs):
return self.fn(x, *args, **kwargs) + x
def Upsample(dim):
return nn.ConvTranspose2d(dim, dim, 4, 2, 1)
def Downsample(dim):
return nn.Conv2d(dim, dim, 4, 2, 1)
class SinusoidalPositionEmbeddings(nn.Module):
def __init__(self, dim):
super().__init__()
self.dim = dim
def forward(self, time):
device = time.device
half_dim = self.dim // 2
embeddings = math.log(10000) / (half_dim - 1)
embeddings = torch.exp(torch.arange(half_dim, device=device) * -embeddings)
embeddings = time[:, None] * embeddings[None, :]
embeddings = torch.cat((embeddings.sin(), embeddings.cos()), dim=-1)
return embeddings
class ConvNextBlock(nn.Module):
"""https://arxiv.org/abs/2201.03545"""
def __init__(self, dim, dim_out, *, time_emb_dim=None, mult=2, norm=True):
super().__init__()
self.mlp = (
nn.Sequential(nn.GELU(), nn.Linear(time_emb_dim, dim))
if exists(time_emb_dim)
else None
)
self.ds_conv = nn.Conv2d(dim, dim, 7, padding=3, groups=dim)
self.net = nn.Sequential(
nn.GroupNorm(1, dim) if norm else nn.Identity(),
nn.Conv2d(dim, dim_out * mult, 3, padding=1),
nn.GELU(),
nn.GroupNorm(1, dim_out * mult),
nn.Conv2d(dim_out * mult, dim_out, 3, padding=1),
)
self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
def forward(self, x, time_emb=None):
h = self.ds_conv(x)
if exists(self.mlp) and exists(time_emb):
assert exists(time_emb), "time embedding must be passed in"
condition = self.mlp(time_emb)
h = h + rearrange(condition, "b c -> b c 1 1")
h = self.net(h)
return h + self.res_conv(x)
class Attention(nn.Module):
def __init__(self, dim, heads=4, dim_head=32):
super().__init__()
self.scale = dim_head**-0.5
self.heads = heads
hidden_dim = dim_head * heads
self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False)
self.to_out = nn.Conv2d(hidden_dim, dim, 1)
def forward(self, x):
b, c, h, w = x.shape
qkv = self.to_qkv(x).chunk(3, dim=1)
q, k, v = map(
lambda t: rearrange(t, "b (h c) x y -> b h c (x y)", h=self.heads), qkv
)
q = q * self.scale
sim = einsum("b h d i, b h d j -> b h i j", q, k)
sim = sim - sim.amax(dim=-1, keepdim=True).detach()
attn = sim.softmax(dim=-1)
out = einsum("b h i j, b h d j -> b h i d", attn, v)
out = rearrange(out, "b h (x y) d -> b (h d) x y", x=h, y=w)
return self.to_out(out)
class LinearAttention(nn.Module):
def __init__(self, dim, heads=4, dim_head=32):
super().__init__()
self.scale = dim_head**-0.5
self.heads = heads
hidden_dim = dim_head * heads
self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False)
self.to_out = nn.Sequential(nn.Conv2d(hidden_dim, dim, 1),
nn.GroupNorm(1, dim))
def forward(self, x):
b, c, h, w = x.shape
qkv = self.to_qkv(x).chunk(3, dim=1)
q, k, v = map(
lambda t: rearrange(t, "b (h c) x y -> b h c (x y)", h=self.heads), qkv
)
q = q.softmax(dim=-2)
k = k.softmax(dim=-1)
q = q * self.scale
context = torch.einsum("b h d n, b h e n -> b h d e", k, v)
out = torch.einsum("b h d e, b h d n -> b h e n", context, q)
out = rearrange(out, "b h c (x y) -> b (h c) x y", h=self.heads, x=h, y=w)
return self.to_out(out)
class PreNorm(nn.Module):
def __init__(self, dim, fn):
super().__init__()
self.fn = fn
self.norm = nn.GroupNorm(1, dim)
def forward(self, x):
x = self.norm(x)
return self.fn(x)
class Unet(nn.Module):
def __init__(
self,
dim,
init_dim=None,
out_dim=None,
dim_mults=(1, 2, 4, 8),
channels=3,
with_time_emb=True,
convnext_mult=2,
):
super().__init__()
# determine dimensions
self.channels = channels
init_dim = default(init_dim, dim // 3 * 2)
self.init_conv = nn.Conv2d(channels, init_dim, 7, padding=3)
dims = [init_dim, *map(lambda m: dim * m, dim_mults)]
in_out = list(zip(dims[:-1], dims[1:]))
block_klass = partial(ConvNextBlock, mult=convnext_mult)
# time embeddings
if with_time_emb:
time_dim = dim * 4
self.time_mlp = nn.Sequential(
SinusoidalPositionEmbeddings(dim),
nn.Linear(dim, time_dim),
nn.GELU(),
nn.Linear(time_dim, time_dim),
)
else:
time_dim = None
self.time_mlp = None
# layers
self.downs = nn.ModuleList([])
self.ups = nn.ModuleList([])
num_resolutions = len(in_out)
for ind, (dim_in, dim_out) in enumerate(in_out):
is_last = ind >= (num_resolutions - 1)
self.downs.append(
nn.ModuleList(
[
block_klass(dim_in, dim_out, time_emb_dim=time_dim),
block_klass(dim_out, dim_out, time_emb_dim=time_dim),
Residual(PreNorm(dim_out, LinearAttention(dim_out))),
Downsample(dim_out) if not is_last else nn.Identity(),
]
)
)
mid_dim = dims[-1]
self.mid_block1 = block_klass(mid_dim, mid_dim, time_emb_dim=time_dim)
self.mid_attn = Residual(PreNorm(mid_dim, Attention(mid_dim)))
self.mid_block2 = block_klass(mid_dim, mid_dim, time_emb_dim=time_dim)
for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])):
is_last = ind >= (num_resolutions - 1)
self.ups.append(
nn.ModuleList(
[
block_klass(dim_out * 2, dim_in, time_emb_dim=time_dim),
block_klass(dim_in, dim_in, time_emb_dim=time_dim),
Residual(PreNorm(dim_in, LinearAttention(dim_in))),
Upsample(dim_in) if not is_last else nn.Identity(),
]
)
)
out_dim = default(out_dim, channels)
self.final_conv = nn.Sequential(
block_klass(dim, dim), nn.Conv2d(dim, out_dim, 1)
)
def forward(self, x, time):
x = self.init_conv(x)
t = self.time_mlp(time) if exists(self.time_mlp) else None
h = []
# downsample
for block1, block2, attn, downsample in self.downs:
x = block1(x, t)
x = block2(x, t)
x = attn(x)
h.append(x)
x = downsample(x)
# bottleneck
x = self.mid_block1(x, t)
x = self.mid_attn(x)
x = self.mid_block2(x, t)
# upsample
for block1, block2, attn, upsample in self.ups:
x = torch.cat((x, h.pop()), dim=1)
x = block1(x, t)
x = block2(x, t)
x = attn(x)
x = upsample(x)
return self.final_conv(x)
python==3.10
jupyterlab
pytorch
torchvision
tqdm
matplotlib
einops
datasets
\ No newline at end of file
image diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:markdown id:w_5p3EyVknLC tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [DRL1] - Solving CartPole with DQN
<!-- DESC --> Using a a Deep Q-Network to play CartPole - an inverted pendulum problem (PyTorch)
<!-- AUTHOR : Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS) -->
By Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS)
%% Cell type:markdown id:ucB28wGpmFwi tags:
## Objectives
* Understand the code behind the DQN algorithm
* Visualize the result for fun purposes :)
This notebook implements a DQN from scratch and trains it. It is simply a vanilla DQN with a target network (sometimes referred as Double DQN). More sophisticated and recent modifications might help stabilize the training.
Considering that we are going to use a tiny network for a simple environment, matrix multiplications are not that time consuming, and using a GPU can be detrimental as communications between CPU and GPU are no longer negligeable compared to forward and backward steps. This notebook will therefore be executed on CPU.
The chosen environment will be imported from the gym toolkit (https://gym.openai.com/).
%% Cell type:markdown id:fqQsB2Jwm-BP tags:
## Demonstration steps:
- Define numerous hyperparameters
- Implement the Q-Network
- Implement an agent following the Double DQN algorithm
- Train it for a few minutes
- Visualize the result
%% Cell type:markdown id:nRJmgZ0inpkk tags:
## Installations
Gym requires a graphical interface to render a state observation. Xvfb allows to run the notebook headless. This software is not available on Jean Zay's compute node, hence the usage of Google colab.
%% Cell type:code id:y2Y71JbfgkeU tags:
``` python
!pip3 install pyvirtualdisplay
!pip install pyglet==1.5.11
!apt-get install x11-utils > /dev/null 2>&1
!apt-get install -y xvfb python-opengl > /dev/null 2>&1
```
%% Cell type:markdown id:q6eYfBKnoOJQ tags:
## Imports
I chose to use Pytorch to implement this DQN due to its straightforward API and personal preferences.
Gym implements the environment.
%% Cell type:code id:0fc91d65-4756-4432-906c-7d315d981775 tags:
``` python
import numpy as np
import torch
import torch.nn as nn
import gym
from gym import wrappers
import random
from tqdm.notebook import tqdm
import functools
import matplotlib.pyplot as plt
import os
import io
import base64
import glob
from IPython.display import display, HTML
```
%% Cell type:markdown id:Hao-RYcdowHn tags:
## Hyperparameters
The size of the replay buffer does not matter much. In this case, it is big enough to hold every transitions we will have in our training. This choice does have a huge impact on memory though.
Warm-up allows the network to gather some information before the training process begins.
The target network will only be updated once every 10k steps in order to stabilize the training.
The exploration rate is linearly decreasing, although an exponential curve is a sound and common choice as well.
As mentioned above, only the CPU will be used, the GPU would be useful for bigger networks, and / or environments which have a torch tensor internal state.
Considering this is a simple DQN implementation, its stability leaves a lot to be desired. In order not to rely on luck, a decent seed was chosen.
%% Cell type:code id:6fX1X6y6YHXF tags:
``` python
learning_rate = 0.0001
buffer_size = 200000
warmup_steps = 10000
batch_size = 32
gamma = 0.99
train_freq = 4
target_update_interval = 10000
exploration_fraction = 0.1
exploration_initial_eps = 1.0
exploration_final_eps = 0.05
device = torch.device("cpu") # torch.device("cuda" if torch.cuda.is_available() else "cpu")
seed = 987654321
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
```
%% Cell type:markdown id:TofGB-s7qfSH tags:
## Q-Network and Agent implementation
%% Cell type:code id:4VhftO9PaE9g tags:
``` python
class DQN(nn.Module):
def __init__(self):
super(DQN, self).__init__()
self.layer1 = nn.Linear(4, 64)
self.layer2 = nn.Linear(64, 64)
self.layer3 = nn.Linear(64, 2)
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.layer1(x))
x = self.relu(self.layer2(x))
return self.layer3(x)
def compute_target(self, x, rewards):
with torch.no_grad():
values = torch.zeros(x.shape[0], device=device)
values[rewards != 1] = torch.max(self.forward(x[rewards != 1]), dim=-1)[0]
values = rewards + gamma * values
return values
def predict(self, x):
if len(x.shape) < 2:
x = x[None, :]
with torch.no_grad():
x = torch.argmax(self.forward(x), dim=-1)
if x.device.type == "cuda":
x = x.cpu()
return x
class Agent:
def __init__(self, env):
self.env = env
self.q_network = DQN().to(device)
self.target_network = DQN().to(device)
self.target_network.eval()
self.synchronize()
self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr=learning_rate)
self.criterion = nn.MSELoss()
self.buffer = []
self.n_updates = 0
def add_transition(self, state, action, reward, nextState):
self.buffer.append((state, action, reward, nextState))
if len(self.buffer) > buffer_size:
self.buffer.pop(random.randrange(len(self.buffer)))
def sample(self):
transitions = random.sample(self.buffer, batch_size)
states, actions, rewards, nextStates = zip(*transitions)
states = torch.stack(states).to(device)
actions = torch.cat(actions).to(device)
rewards = torch.cat(rewards).to(device)
nextStates = torch.stack(nextStates).to(device)
return states, actions, rewards, nextStates
def train_step(self, step):
if step % target_update_interval == 0:
self.synchronize()
if step < warmup_steps or step % train_freq != 0:
return 0.
states, actions, rewards, nextStates = self.sample()
output = self.q_network(states)
output = torch.gather(output, 1, actions.unsqueeze(-1)).view(-1)
expectedOutput = self.target_network.compute_target(nextStates, rewards).view(-1)
self.optimizer.zero_grad()
loss = self.criterion(output, expectedOutput)
loss.backward()
torch.nn.utils.clip_grad_norm_(self.q_network.parameters(), 10)
self.optimizer.step()
self.n_updates += 1
return loss.item()
def synchronize(self):
self.target_network.load_state_dict(self.q_network.state_dict())
def play(self, state, exploration_rate=0.):
if random.random() > exploration_rate:
return self.q_network.predict(state.to(device))
else:
shape = (state.shape[0],) if len(state.shape) > 1 else (1,)
return torch.randint(0, 2, size=shape)
@functools.lru_cache(maxsize=None)
def exploration_slope(self, total_steps):
return (exploration_initial_eps - exploration_final_eps) / (exploration_fraction * total_steps)
def exploration(self, step, total_steps):
eps = exploration_initial_eps - step * self.exploration_slope(total_steps)
return max(eps, exploration_final_eps)
def train(self, total_steps):
obs = torch.from_numpy(env.reset()).float()
n_episodes = 0
length_current_episode = 0
lengths = []
avg_reward = 0
loss_backup = 0.
acc_loss = 0.
acc_loss_count = 0
self.rewards = []
with tqdm(range(total_steps), desc="Training agent", unit="steps") as pbar:
for step in pbar:
eps = self.exploration(step, total_steps)
action = self.play(obs, eps)
new_obs, _, done, info = env.step(action.item())
reward = torch.tensor([1.0 if not done else -1.0], dtype=torch.float32)
new_obs = torch.from_numpy(new_obs).float()
self.add_transition(obs, action, reward, new_obs)
loss = self.train_step(step)
if loss != 0:
acc_loss += loss
acc_loss_count += 1
if done:
obs = torch.from_numpy(env.reset()).float()
n_episodes += 1
lengths.append(length_current_episode)
self.rewards.append(length_current_episode)
length_current_episode = 0
if len(lengths) >= 25:
avg_reward = sum(lengths) / len(lengths)
if acc_loss_count != 0:
loss_backup = acc_loss / acc_loss_count
else:
loss_backup = "??"
acc_loss = 0.
acc_loss_count = 0
lengths = []
else:
obs = new_obs
length_current_episode += 1
pbar.set_postfix({
"episodes": n_episodes,
"avg_reward": avg_reward,
"loss": loss_backup,
"exploration_rate": eps,
"n_updates": self.n_updates,
})
```
%% Cell type:markdown id:Kne9b7vCql3N tags:
## Defining the environment
%% Cell type:code id:BXw4RmGpFkZm tags:
``` python
env = gym.make("CartPole-v1")
env.seed(seed+2)
env.reset()
```
%% Cell type:markdown id:i93WQNsbqo68 tags:
## Training our agent
%% Cell type:code id:rAm6v_0HiEge tags:
``` python
agent = Agent(env)
agent.train(120000)
```
%% Cell type:markdown id:PPT-tl4Rqroj tags:
## Episodes length
A very noisy curve. It does reach satisfying levels though.
%% Cell type:code id:IoCnHaZKgHqI tags:
``` python
fig = plt.figure(figsize=(20, 12))
plt.plot(agent.rewards)
plt.xlabel("Episodes")
plt.ylabel("Episode length")
plt.show()
```
%% Cell type:markdown id:0fuolKppq1Ak tags:
## Result visualisation
%% Cell type:code id:GXT1q5ckh0dG tags:
``` python
from pyvirtualdisplay import Display
virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()
```
%% Cell type:code id:710b8294-4f75-49b5-a54a-777439ce8799 tags:
``` python
env = gym.make("CartPole-v1")
env.seed(4)
env = wrappers.Monitor(env, "./CartPole-v1/", force=True)
obs = env.reset()
i = 0
while True:
action = agent.q_network.predict(torch.from_numpy(obs).float().to(device))
obs, rewards, done, info = env.step(action.item())
env.render()
if done:
break
else:
i += 1
env.close()
print(f"Survived {i} steps")
```
%% Cell type:code id:c7ad6655-02b7-436e-a7ae-93a7222b100e tags:
``` python
def ipython_show_video(path):
"""Shamelessly stolen from https://stackoverflow.com/a/51183488/9977878
"""
if not os.path.isfile(path):
raise NameError("Cannot access: {}".format(path))
video = io.open(path, 'r+b').read()
encoded = base64.b64encode(video)
display(HTML(
data="""
<video alt="test" controls>
<source src="data:video/mp4;base64,{0}" type="video/mp4" />
</video>
""".format(encoded.decode('ascii'))
))
ipython_show_video(glob.glob("/content/CartPole-v1/*.mp4")[0])
```
%% Cell type:code id:31e6af84-489e-4665-919e-8234462c1f0a tags:
``` python
```
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [DRL2] - RL Baselines3 Zoo: Training in Colab
<!-- DESC --> Demo of Stable baseline3 with Colab
<!-- AUTHOR : Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS) -->
Demo of Stable baseline3 adapted By Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS)
Github Repo: [https://github.com/DLR-RM/rl-baselines3-zoo](https://github.com/DLR-RM/rl-baselines3-zoo)
Stable-Baselines3 Repo: [https://github.com/DLR-RM/rl-baselines3-zoo](https://github.com/DLR-RM/stable-baselines3)
# Install Dependencies
%% Cell type:code id: tags:
```
!apt-get install swig cmake ffmpeg freeglut3-dev xvfb
```
%% Cell type:code id: tags:
```
!apt-get install -y \
libgl1-mesa-dev \
libgl1-mesa-glx \
libglew-dev \
libosmesa6-dev \
software-properties-common
!apt-get install -y patchelf
```
%% Cell type:markdown id: tags:
## Clone RL Baselines3 Zoo Repo
%% Cell type:code id: tags:
```
!git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo
```
%% Cell type:code id: tags:
```
%cd /content/rl-baselines3-zoo/
```
%% Cell type:markdown id: tags:
### Install pip dependencies
%% Cell type:code id: tags:
```
!pip install -r requirements.txt
```
%% Cell type:code id: tags:
```
!pip install free-mujoco-py
```
%% Cell type:markdown id: tags:
## Pretrained model
gym environments: https://gym.openai.com/envs/
%% Cell type:code id: tags:
```
%cd /content/rl-baselines3-zoo/
```
%% Cell type:markdown id: tags:
### Record a Video
%% Cell type:code id: tags:
```
# Set up display; otherwise rendering will fail
import os
os.system("Xvfb :1 -screen 0 1024x768x24 &")
os.environ['DISPLAY'] = ':1'
```
%% Cell type:code id: tags:
```
import base64
from pathlib import Path
from IPython import display as ipythondisplay
def show_videos(video_path='', prefix=''):
"""
Taken from https://github.com/eleurent/highway-env
:param video_path: (str) Path to the folder containing videos
:param prefix: (str) Filter the video, showing only the only starting with this prefix
"""
html = []
for mp4 in Path(video_path).glob("**/*{}*.mp4".format(prefix)):
video_b64 = base64.b64encode(mp4.read_bytes())
html.append('''{} <br> <video alt="{}" autoplay
loop controls style="height: 400px;">
<source src="data:video/mp4;base64,{}" type="video/mp4" />
</video>'''.format(mp4, mp4, video_b64.decode('ascii')))
ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))
```
%% Cell type:markdown id: tags:
### Discrete environments
%% Cell type:code id: tags:
```
%run scripts/all_plots.py -a dqn qrdqn a2c ppo --env PongNoFrameskip-v4 -f rl-trained-agents/
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a dqn -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a qrdqn -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a a2c -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a ppo -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 5000
```
%% Cell type:code id: tags:
```
!python -m utils.record_video --algo dqn --env PongNoFrameskip-v4
```
%% Cell type:code id: tags:
```
show_videos(video_path='rl-trained-agents/dqn', prefix='PongNoFrameskip-v4')
```
%% Cell type:markdown id: tags:
### Continuous environments
%% Cell type:code id: tags:
```
%run scripts/all_plots.py -a ppo trpo sac td3 tqc --env Ant-v3 -f rl-trained-agents/
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a ppo -e Ant-v3 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a trpo -e Ant-v3 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a tqc -e Ant-v3 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a td3 -e Ant-v3 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
%run scripts/plot_train.py -a sac -e Ant-v3 -f rl-trained-agents/ -x time
```
%% Cell type:code id: tags:
```
!python enjoy.py --algo td3 --env Ant-v3 --no-render --n-timesteps 5000
```
%% Cell type:code id: tags:
```
!python -m utils.record_video --algo td3 --env Ant-v3
```
%% Cell type:code id: tags:
```
show_videos(video_path='rl-trained-agents/td3', prefix='Ant-v3')
```
%% Cell type:markdown id: tags:
## Train an RL Agent
The train agent can be found in the `logs/` folder.
Here we will train A2C on CartPole-v1 environment for 100 000 steps.
To train it on Pong (Atari), you just have to pass `--env PongNoFrameskip-v4`
Note: You need to update `hyperparams/algo.yml` to support new environments. You can access it in the side panel of Google Colab. (see https://stackoverflow.com/questions/46986398/import-data-into-google-colaboratory)
%% Cell type:code id: tags:
```
!python train.py --algo dqn --env PongNoFrameskip-v4 --n-timesteps 1000000
```
%% Cell type:markdown id: tags:
#### Evaluate trained agent
You can remove the `--folder logs/` to evaluate pretrained agent.
%% Cell type:code id: tags:
```
!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 5000 --folder logs/
```
%% Cell type:markdown id: tags:
#### Tune Hyperparameters
We use [Optuna](https://optuna.org/) for optimizing the hyperparameters.
Tune the hyperparameters for PPO, using a tpe sampler and median pruner, 2 parallels jobs,
with a budget of 1000 trials and a maximum of 50000 steps
%% Cell type:code id: tags:
```
#!python train.py --algo dqn --env PongNoFrameskip-v4 -n 5000 -optimize --n-trials 10 --n-jobs 5 --sampler tpe --pruner median
```
%% Cell type:markdown id: tags:
### Display the video
%% Cell type:markdown id: tags:
### Continue Training
Here, we will continue training of the previous model
%% Cell type:code id: tags:
```
#!python train.py --algo dqn --env PongNoFrameskip-v4 --n-timesteps 50000 -i logs/dqn/PongNoFrameskip-v4_1/PongNoFrameskip-v4.zip
```
%% Cell type:code id: tags:
```
#!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 1000 --folder logs/
```
%% Cell type:code id: tags:
```
```
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3IMDB1] - Sentiment analysis with hot-one encoding
<!-- DESC --> A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- Understand the management of **textual data** and **sentiment analysis**
Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)
## What we're going to do :
- Retrieve data
- Preparing the data
- Build a model
- Train the model
- Evaluate the result
%% Cell type:markdown id: tags:
## Step 1 - Import and init
### 1.1 - Python stuff
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import keras.datasets.imdb as imdb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3IMDB1')
```
%% Cell type:markdown id: tags:
### 1.2 - Parameters
The words in the vocabulary are classified from the most frequent to the rarest.\
`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).\
`hide_most_frequently` is the number of ignored words, among the most common ones\
`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch
%% Cell type:code id: tags:
``` python
vocab_size = 5000
hide_most_frequently = 0
epochs = 10
batch_size = 512
fit_verbosity = 1
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('vocab_size', 'hide_most_frequently', 'batch_size', 'epochs', 'fit_verbosity')
```
%% Cell type:markdown id: tags:
## Step 2 - Understanding hot-one encoding
#### We have a **sentence** and a **dictionary** :
%% Cell type:code id: tags:
``` python
sentence = "I've never seen a movie like this before"
dictionary = {"a":0, "before":1, "fantastic":2, "i've":3, "is":4, "like":5, "movie":6, "never":7, "seen":8, "this":9}
```
%% Cell type:markdown id: tags:
#### We encode our sentence as a **numerical vector** :
%% Cell type:code id: tags:
``` python
sentence_words = sentence.lower().split()
sentence_vect = [ dictionary[w] for w in sentence_words ]
print('Words sentence are : ', sentence_words)
print('Our vectorized sentence is : ', sentence_vect)
```
%% Cell type:markdown id: tags:
#### Next, we **one-hot** encode our vectorized sentence as a tensor :
%% Cell type:code id: tags:
``` python
# ---- We get a (sentence length x vector size) matrix of zeros
#
onehot = np.zeros( (10,8) )
# ---- We set some 1 for each word
#
for i,w in enumerate(sentence_vect):
onehot[w,i]=1
# --- Show it
#
print('In a basic way :\n\n', onehot, '\n\nWith a pandas wiew :\n')
data={ f'{sentence_words[i]:.^10}':onehot[:,i] for i,w in enumerate(sentence_vect) }
df=pd.DataFrame(data)
df.index=dictionary.keys()
# --- Pandas Warning
#
df.style.format('{:1.0f}').highlight_max(axis=0).set_properties(**{'text-align': 'center'})
```
%% Cell type:markdown id: tags:
## Step 3 - Retrieve data
IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets/imdb/)
Note : Due to their nature, textual data can be somewhat complex.
### 3.1 - Data structure :
The dataset is composed of 2 parts:
- **reviews**, this will be our **x**
- **opinions** (positive/negative), this will be our **y**
There are also a **dictionary**, because words are indexed in reviews
```
<dataset> = (<reviews>, <opinions>)
with : <reviews> = [ <review1>, <review2>, ... ]
<opinions> = [ <rate1>, <rate2>, ... ] where <ratei> = integer
where : <reviewi> = [ <w1>, <w2>, ...] <wi> are the index (int) of the word in the dictionary
<ratei> = int 0 for negative opinion, 1 for positive
<dictionary> = [ <word1>:<w1>, <word2>:<w2>, ... ]
with : <wordi> = word
<wi> = int
```
%% Cell type:markdown id: tags:
### 3.2 - Load dataset
For simplicity, we will use a pre-formatted dataset - See [documentation](https://keras.io/api/datasets/imdb)
However, Keras offers some useful tools for formatting textual data - See [documentation](hhttps://keras.io/api/layers/preprocessing_layers/text/text_vectorization/)
By default :
- Start of a sequence will be marked with : 1
- Out of vocabulary word will be : 2
- First index will be : 3
%% Cell type:code id: tags:
``` python
# ----- Retrieve x,y
#
start_char = 1 # Start of a sequence (padding is 0)
oov_char = 2 # Out-of-vocabulary
index_from = 3 # First word id
(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words = vocab_size,
skip_top = hide_most_frequently,
start_char = start_char,
oov_char = oov_char,
index_from = index_from)
# ---- About
#
print("Max(x_train,x_test) : ", fidle.utils.rmax([x_train,x_test]) )
print("Min(x_train,x_test) : ", fidle.utils.rmin([x_train,x_test]) )
print("Len(x_train) : ", len(x_train))
print("Len(x_test) : ", len(x_test))
```
%% Cell type:markdown id: tags:
## Step 4 - About our dataset
When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
So, we shifted the dataset by 3 with the parameter index_from=3
### 4.1 - Sentences encoding
%% Cell type:code id: tags:
``` python
print('\nReview example (x_train[12]) :\n\n',x_train[12])
print('\nOpinions (y_train) :\n\n',y_train)
```
%% Cell type:markdown id: tags:
### 4.2 - Load dictionary
%% Cell type:code id: tags:
``` python
# ---- Retrieve dictionary {word:index}, and encode it in ascii
#
word_index = imdb.get_word_index()
# ---- Shift the dictionary from <index_from>
#
word_index = {w:(i+index_from) for w,i in word_index.items()}
# ---- Add <pad>, <start> and <unknown> tags
#
word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
# ---- Create a reverse dictionary : {index:word}
#
index_word = {index:word for word,index in word_index.items()}
# ---- About dictionary
#
print('\nDictionary size : ', len(word_index))
print('\nSmall extract :\n')
for k in range(440,455):print(f' {k:2d} : {index_word[k]}' )
# ---- Add a nice function to transpose :
#
def dataset2text(review):
return ' '.join([index_word.get(i, '?') for i in review])
```
%% Cell type:markdown id: tags:
### 4.3 - Have a look, for human
%% Cell type:code id: tags:
``` python
fidle.utils.subtitle('Review example :')
print(x_train[12])
fidle.utils.subtitle('After translation :')
print(dataset2text(x_train[12]))
```
%% Cell type:markdown id: tags:
### 4.4 - Few statistics
%% Cell type:code id: tags:
``` python
sizes=[len(i) for i in x_train]
plt.figure(figsize=(12,4))
plt.hist(sizes, bins=400)
plt.gca().set(title='Distribution of reviews by size - [{:5.2f}, {:5.2f}]'.format(min(sizes),max(sizes)),
xlabel='Size', ylabel='Density', xlim=[0,1500])
fidle.scrawler.save_fig('01-stats-sizes')
plt.show()
```
%% Cell type:code id: tags:
``` python
unk=[ 100*(s.count(oov_char)/len(s)) for s in x_train]
plt.figure(figsize=(12,4))
plt.hist(unk, bins=100)
plt.gca().set(title='Percent of unknown words - [{:5.2f}, {:5.2f}]'.format(min(unk),max(unk)),
xlabel='# unknown', ylabel='Density', xlim=[0,30])
fidle.scrawler.save_fig('02-stats-unknown')
plt.show()
```
%% Cell type:markdown id: tags:
## Step 5 - Basic approach with "one-hot" vector encoding
Basic approach.
Each sentence is encoded with a **vector** of length equal to the **size of the dictionary**.
Each sentence will therefore be encoded with a simple vector.
The value of each component is 0 if the word is not present in the sentence or 1 if the word is present.
For a sentence s=[3,4,7] and a dictionary of 10 words...
We wil have a vector v=[0,0,0,1,1,0,0,1,0,0,0]
%% Cell type:markdown id: tags:
### 5.1 - Our one-hot encoder function
%% Cell type:code id: tags:
``` python
def one_hot_encoder(x, vector_size=10000):
# ---- Set all to 0
#
x_encoded = np.zeros((len(x), vector_size))
# ---- For each sentence
#
for i,sentence in enumerate(x):
for word in sentence:
x_encoded[i, word] = 1.
return x_encoded
```
%% Cell type:markdown id: tags:
### 5.2 - Encoding..
%% Cell type:code id: tags:
``` python
x_train = one_hot_encoder(x_train, vector_size=vocab_size)
x_test = one_hot_encoder(x_test, vector_size=vocab_size)
print("To have a look, x_train[12] became :", x_train[12] )
```
%% Cell type:markdown id: tags:
## Step 6 - Build a nice model
%% Cell type:code id: tags:
``` python
model = keras.Sequential(name='My IMDB classifier')
model.add(keras.layers.Input( shape=(vocab_size,) ))
model.add(keras.layers.Dense( 32, activation='relu'))
model.add(keras.layers.Dense( 32, activation='relu'))
model.add(keras.layers.Dense( 1, activation='sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['accuracy'])
model.summary()
```
%% Cell type:markdown id: tags:
## Step 7 - Train the model
### 7.1 - Add callback
%% Cell type:code id: tags:
``` python
os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)
save_dir = f'{run_dir}/models/best_model.keras'
savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
```
%% Cell type:markdown id: tags:
### 7.2 - Train it
%% Cell type:code id: tags:
``` python
%%time
history = model.fit(x_train,
y_train,
epochs = epochs,
batch_size = batch_size,
validation_data = (x_test, y_test),
verbose = fit_verbosity,
callbacks = [savemodel_callback])
```
%% Cell type:markdown id: tags:
## Step 8 - Evaluate
### 8.1 - Training history
%% Cell type:code id: tags:
``` python
fidle.scrawler.history(history, save_as='02-history')
```
%% Cell type:markdown id: tags:
### 8.2 - Reload and evaluate best model
%% Cell type:code id: tags:
``` python
model = keras.models.load_model(f'{run_dir}/models/best_model.keras')
# ---- Evaluate
score = model.evaluate(x_test, y_test, verbose=0)
print('\n\nModel evaluation :\n')
print(' x_test / loss : {:5.4f}'.format(score[0]))
print(' x_test / accuracy : {:5.4f}'.format(score[1]))
values=[score[1], 1-score[1]]
fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
# ---- Confusion matrix
y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
y_pred = y_sigmoid.copy()
y_pred[ y_sigmoid< 0.5 ] = 0
y_pred[ y_sigmoid>=0.5 ] = 1
fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3IMDB2] - Sentiment analysis with text embedding
<!-- DESC --> A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- Understand the management of **textual data** and **sentiment analysis**
Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)
## What we're going to do :
- Retrieve data
- Preparing the data
- Build a model
- Train the model
- Evaluate the result
%% Cell type:markdown id: tags:
## Step 1 - Import and init
### 1.1 - Python stuff
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import keras.datasets.imdb as imdb
import h5py,json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3IMDB2')
```
%% Cell type:markdown id: tags:
### 1.2 - Parameters
The words in the vocabulary are classified from the most frequent to the rarest.
`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
`hide_most_frequently` is the number of ignored words, among the most common ones
`review_len` is the review length
`dense_vector_size` is the size of the generated dense vectors
`output_dir` is where we will go to save our dictionaries. (./data is a good choice)\
`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch
%% Cell type:code id: tags:
``` python
vocab_size = 5000
hide_most_frequently = 0
review_len = 256
dense_vector_size = 32
epochs = 30
batch_size = 512
output_dir = './data'
fit_verbosity = 1
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('vocab_size', 'hide_most_frequently', 'review_len', 'dense_vector_size')
fidle.override('batch_size', 'epochs', 'output_dir', 'fit_verbosity')
```
%% Cell type:markdown id: tags:
## Step 2 - Retrieve data
IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets)
Note : Due to their nature, textual data can be somewhat complex.
For more details about the management of this dataset, see notebook [IMDB1](01-One-hot-encoding.ipynb)
%% Cell type:markdown id: tags:
### 2.2 - Get dataset
%% Cell type:code id: tags:
``` python
# ----- Retrieve x,y
#
start_char = 1 # Start of a sequence (padding is 0)
oov_char = 2 # Out-of-vocabulary
index_from = 3 # First word id
(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words = vocab_size,
skip_top = hide_most_frequently,
start_char = start_char,
oov_char = oov_char,
index_from = index_from)
# ---- About
#
print("Max(x_train,x_test) : ", fidle.utils.rmax([x_train,x_test]) )
print("Min(x_train,x_test) : ", fidle.utils.rmin([x_train,x_test]) )
print("Len(x_train) : ", len(x_train))
print("Len(x_test) : ", len(x_test))
```
%% Cell type:markdown id: tags:
### 2.2 - Load dictionary
Not essential, but nice if you want to take a closer look at our reviews ;-)
%% Cell type:code id: tags:
``` python
# ---- Retrieve dictionary {word:index}, and encode it in ascii
# Shift the dictionary from +3
# Add <pad>, <start> and <unknown> tags
# Create a reverse dictionary : {index:word}
#
word_index = imdb.get_word_index()
word_index = {w:(i+index_from) for w,i in word_index.items()}
word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
index_word = {index:word for word,index in word_index.items()}
# ---- A nice function to transpose :
#
def dataset2text(review):
return ' '.join([index_word.get(i, '?') for i in review])
```
%% Cell type:markdown id: tags:
## Step 3 - Preprocess the data (padding)
In order to be processed by an NN, all entries must have the **same length.**
We chose a review length of **review_len**
We will therefore complete them with a padding (of 0 as \<pad\>\)
%% Cell type:code id: tags:
``` python
x_train = keras.preprocessing.sequence.pad_sequences(x_train,
value = 0,
padding = 'post',
maxlen = review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test,
value = 0 ,
padding = 'post',
maxlen = review_len)
fidle.utils.subtitle('After padding :')
print(x_train[12])
```
%% Cell type:markdown id: tags:
**Save dataset and dictionary (For future use but not mandatory)**
%% Cell type:code id: tags:
``` python
# ---- Write dataset in a h5 file, could be usefull
#
fidle.utils.mkdir(output_dir)
with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:
f.create_dataset("x_train", data=x_train)
f.create_dataset("y_train", data=y_train)
f.create_dataset("x_test", data=x_test)
f.create_dataset("y_test", data=y_test)
print('Dataset h5 file saved.')
with open(f'{output_dir}/word_index.json', 'w') as fp:
json.dump(word_index, fp)
print('Word to index saved.')
```
%% Cell type:markdown id: tags:
## Step 4 - Build the model
More documentation about this model functions :
- [Embedding](https://keras.io/api/layers/core_layers/embedding/)
- [GlobalAveragePooling1D](https://keras.io/api/layers/pooling_layers/global_average_pooling1d/)
%% Cell type:code id: tags:
``` python
model = keras.Sequential(name='Embedding model')
model.add(keras.layers.Input( shape=(review_len,) ))
model.add(keras.layers.Embedding( input_dim = vocab_size,
output_dim = dense_vector_size))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(dense_vector_size, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.compile( optimizer = 'adam',
loss = 'binary_crossentropy',
metrics = ['accuracy'])
model.summary()
```
%% Cell type:markdown id: tags:
## Step 5 - Train the model
### 5.1 Add Callbacks
%% Cell type:code id: tags:
``` python
os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)
save_dir = f'{run_dir}/models/best_model.keras'
savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
```
%% Cell type:markdown id: tags:
### 5.2 - Train it
%% Cell type:code id: tags:
``` python
%%time
history = model.fit(x_train,
y_train,
epochs = epochs,
batch_size = batch_size,
validation_data = (x_test, y_test),
verbose = fit_verbosity,
callbacks = [savemodel_callback])
```
%% Cell type:markdown id: tags:
## Step 6 - Evaluate
### 6.1 - Training history
%% Cell type:code id: tags:
``` python
fidle.scrawler.history(history, save_as='02-history')
```
%% Cell type:markdown id: tags:
### 6.2 - Reload and evaluate best model
%% Cell type:code id: tags:
``` python
model = keras.models.load_model(f'{run_dir}/models/best_model.keras')
# ---- Evaluate
score = model.evaluate(x_test, y_test, verbose=0)
print('x_test / loss : {:5.4f}'.format(score[0]))
print('x_test / accuracy : {:5.4f}'.format(score[1]))
values=[score[1], 1-score[1]]
fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
# ---- Confusion matrix
y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
y_pred = y_sigmoid.copy()
y_pred[ y_sigmoid< 0.5 ] = 0
y_pred[ y_sigmoid>=0.5 ] = 1
fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3IMDB3] - Reload and reuse a saved model
<!-- DESC --> Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- The objective is to guess whether our personal film reviews are **positive or negative** based on the analysis of the text.
- For this, we will use our **previously saved model**.
## What we're going to do :
- Preparing our data
- Retrieve our saved model
- Evaluate the result
%% Cell type:markdown id: tags:
## Step 1 - Init python stuff
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import json,re
import numpy as np
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3IMDB3')
```
%% Cell type:markdown id: tags:
### 1.2 - Parameters
The words in the vocabulary are classified from the most frequent to the rarest.
`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
`review_len` is the review length
`saved_models` where our models were previously saved
`dictionaries_dir` is where we will go to save our dictionaries. (./data is a good choice)
%% Cell type:code id: tags:
``` python
vocab_size = 10000
review_len = 256
saved_models = './run/K3IMDB2'
dictionaries_dir = './data'
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('vocab_size', 'review_len', 'saved_models', 'dictionaries_dir')
```
%% Cell type:markdown id: tags:
## Step 2 : Preparing the data
### 2.1 - Our reviews :
%% Cell type:code id: tags:
``` python
reviews = [ "This film is particularly nice, a must see.",
"This film is a great classic that cannot be ignored.",
"I don't remember ever having seen such a movie...",
"This movie is just abominable and doesn't deserve to be seen!"]
```
%% Cell type:markdown id: tags:
### 2.2 - Retrieve dictionaries
Note : This dictionary is generated by [02-Embedding-Keras](02-Keras-embedding.ipynb) notebook.
%% Cell type:code id: tags:
``` python
with open(f'{dictionaries_dir}/word_index.json', 'r') as fp:
word_index = json.load(fp)
index_word = { i:w for w,i in word_index.items() }
print('Dictionaries loaded. ', len(word_index), 'entries' )
```
%% Cell type:markdown id: tags:
### 2.3 - Clean, index and padd
Phases are split into words, punctuation is removed, sentence length is limited and padding is added...
**Note** : 1 is "Start" and 2 is "unknown"
%% Cell type:code id: tags:
``` python
start_char = 1 # Start of a sequence (padding is 0)
oov_char = 2 # Out-of-vocabulary
index_from = 3 # First word id
nb_reviews = len(reviews)
x_data = []
# ---- For all reviews
for review in reviews:
print('Words are : ', end='')
# ---- First index must be <start>
index_review=[start_char]
print(f'{start_char} ', end='')
# ---- For all words
for w in review.split(' '):
# ---- Clean it
w_clean = re.sub(r"[^a-zA-Z0-9]", "", w)
# ---- Not empty ?
if len(w_clean)>0:
# ---- Get the index - must be inside dict or is out of vocab (oov)
w_index = word_index.get(w, oov_char)
if w_index>vocab_size : w_index=oov_char
# ---- Add the index if < vocab_size
index_review.append(w_index)
print(f'{w_index} ', end='')
# ---- Add the indexed review
x_data.append(index_review)
print()
# ---- Padding
x_data = keras.preprocessing.sequence.pad_sequences(x_data, value = 0, padding = 'post', maxlen = review_len)
```
%% Cell type:markdown id: tags:
### 2.4 - Have a look
%% Cell type:code id: tags:
``` python
def translate(x):
return ' '.join( [index_word.get(i,'?') for i in x] )
for i in range(nb_reviews):
imax=np.where(x_data[i]==0)[0][0]+5
print(f'\nText review {i} :', reviews[i])
print(f'tokens vector :', list(x_data[i][:imax]), '(...)')
print('Translation :', translate(x_data[i][:imax]), '(...)')
```
%% Cell type:markdown id: tags:
## Step 3 - Bring back the model
%% Cell type:code id: tags:
``` python
model = keras.models.load_model(f'{saved_models}/models/best_model.keras')
```
%% Cell type:markdown id: tags:
## Step 4 - Predict
%% Cell type:code id: tags:
``` python
y_pred = model.predict(x_data, verbose=0)
```
%% Cell type:markdown id: tags:
#### And the winner is :
%% Cell type:code id: tags:
``` python
for i,review in enumerate(reviews):
rate = y_pred[i][0]
opinion = 'NEGATIVE :-(' if rate<0.5 else 'POSITIVE :-)'
print(f'{review:<70} => {rate:.2f} - {opinion}')
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3IMDB4] - Reload embedded vectors
<!-- DESC --> Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- The objective is to retrieve and visualize our embedded vectors
- For this, we will use our **previously saved model**.
## What we're going to do :
- Retrieve our saved model
- Extract vectors and play with
%% Cell type:markdown id: tags:
## Step 1 - Init python stuff
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import json,re
import numpy as np
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3IMDB4')
```
%% Cell type:markdown id: tags:
### 1.2 - Parameters
The words in the vocabulary are classified from the most frequent to the rarest.
`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
`review_len` is the review length
`saved_models` where our models were previously saved
`dictionaries_dir` is where we will go to save our dictionaries. (./data is a good choice)
%% Cell type:code id: tags:
``` python
vocab_size = 5000
review_len = 256
saved_models = './run/K3IMDB2'
dictionaries_dir = './data'
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('vocab_size', 'review_len', 'saved_models', 'dictionaries_dir')
```
%% Cell type:markdown id: tags:
## Step 2 - Get the embedding vectors !
%% Cell type:markdown id: tags:
### 2.1 - Load model and dictionaries
Note : This dictionary is generated by [02-Embedding-Keras](02-Keras-embedding.ipynb) notebook.
%% Cell type:code id: tags:
``` python
model = keras.models.load_model(f'{saved_models}/models/best_model.keras')
print('Model loaded.')
with open(f'{dictionaries_dir}/word_index.json', 'r') as fp:
word_index = json.load(fp)
index_word = { i:w for w,i in word_index.items() }
print('Dictionaries loaded. ', len(word_index), 'entries' )
```
%% Cell type:markdown id: tags:
### 2.2 - Retrieve embeddings
%% Cell type:code id: tags:
``` python
embeddings = model.layers[0].get_weights()[0]
print('Shape of embeddings : ',embeddings.shape)
```
%% Cell type:markdown id: tags:
### 2.3 - Build a nice dictionary
%% Cell type:code id: tags:
``` python
word_embedding = { index_word[i]:embeddings[i] for i in range(vocab_size) }
```
%% Cell type:markdown id: tags:
## Step 3 - Have a look !
#### Show embedding of a word :
%% Cell type:code id: tags:
``` python
word_embedding['nice']
```
%% Cell type:markdown id: tags:
#### Few usefull functions to play with
%% Cell type:code id: tags:
``` python
# Return a l2 distance between 2 words
#
def l2w(w1,w2):
v1=word_embedding[w1]
v2=word_embedding[w2]
return np.linalg.norm(v2-v1)
# Show distance between 2 words
#
def show_l2(w1,w2):
print(f'\nL2 between [{w1}] and [{w2}] : ',l2w(w1,w2))
# Displays the 15 closest words to a given word
#
def neighbors(w1):
v1=word_embedding[w1]
dd={}
for i in range(4, 1000):
w2=index_word[i]
dd[w2]=l2w(w1,w2)
dd= {k: v for k, v in sorted(dd.items(), key=lambda item: item[1])}
print(f'\nNeighbors of [{w1}] : ', list(dd.keys())[1:15])
```
%% Cell type:markdown id: tags:
#### Examples
%% Cell type:code id: tags:
``` python
show_l2('nice', 'pleasant')
show_l2('nice', 'horrible')
neighbors('horrible')
neighbors('great')
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3IMDB5] - Sentiment analysis with a RNN network
<!-- DESC --> Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
- Use of a model combining embedding and LSTM
Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)
## What we're going to do :
- Retrieve data
- Preparing the data
- Build a Embedding/LSTM model
- Train the model
- Evaluate the result
%% Cell type:markdown id: tags:
## Step 1 - Init python stuff
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import keras.datasets.imdb as imdb
import json,re
import numpy as np
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3IMDB5')
```
%% Cell type:markdown id: tags:
## Step 2 - Parameters
The words in the vocabulary are classified from the most frequent to the rarest.
`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
`hide_most_frequently` is the number of ignored words, among the most common ones
`review_len` is the review length
`dense_vector_size` is the size of the generated dense vectors
`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch\
`scale` is a dataset scale factor - note a scale=1 need a training time > 10'
%% Cell type:code id: tags:
``` python
vocab_size = 10000
hide_most_frequently = 0
review_len = 256
dense_vector_size = 32
epochs = 10
batch_size = 128
fit_verbosity = 1
scale = 0.2
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('vocab_size', 'hide_most_frequently', 'review_len', 'dense_vector_size')
fidle.override('batch_size', 'epochs', 'fit_verbosity', 'scale')
```
%% Cell type:markdown id: tags:
## Step 3 - Retrieve data
IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets)
Note : Due to their nature, textual data can be somewhat complex.
%% Cell type:markdown id: tags:
### 3.1 - Get dataset
For simplicity, we will use a pre-formatted dataset - See [documentation](https://keras.io/api/datasets/imdb/)
However, Keras offers some usefull tools for formatting textual data - See [documentation](https://keras.io/api/layers/preprocessing_layers/text/text_vectorization/)
**Load dataset :**
%% Cell type:code id: tags:
``` python
# ----- Retrieve x,y
#
start_char = 1 # Start of a sequence (padding is 0)
oov_char = 2 # Out-of-vocabulary
index_from = 3 # First word id
(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words = vocab_size,
skip_top = hide_most_frequently,
start_char = start_char,
oov_char = oov_char,
index_from = index_from)
# ---- Rescale
#
n1 = int(scale * len(x_train))
n2 = int(scale * len(x_test))
x_train, y_train = x_train[:n1], y_train[:n1]
x_test, y_test = x_test[:n2], y_test[:n2]
# ---- About
#
print("Max(x_train,x_test) : ", fidle.utils.rmax([x_train,x_test]) )
print("Min(x_train,x_test) : ", fidle.utils.rmin([x_train,x_test]) )
print("Len(x_train) : ", len(x_train))
print("Len(x_test) : ", len(x_test))
```
%% Cell type:markdown id: tags:
### 3.2 - Have a look for humans (optional)
When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
So, we shifted the dataset by 3 with the parameter index_from=3
**Load dictionary :**
%% Cell type:code id: tags:
``` python
# ---- Retrieve dictionary {word:index}, and encode it in ascii
# Shift the dictionary from +3
# Add <pad>, <start> and <unknown> tags
# Create a reverse dictionary : {index:word}
#
word_index = imdb.get_word_index()
word_index = {w:(i+index_from) for w,i in word_index.items()}
word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
index_word = {index:word for word,index in word_index.items()}
# ---- A nice function to transpose :
#
def dataset2text(review):
return ' '.join([index_word.get(i, '?') for i in review])
```
%% Cell type:markdown id: tags:
**Have a look :**
%% Cell type:code id: tags:
``` python
print('\nDictionary size : ', len(word_index))
for k in range(440,455):print(f'{k:2d} : {index_word[k]}' )
fidle.utils.subtitle('Review example :')
print(x_train[12])
fidle.utils.subtitle('After translation :')
print(dataset2text(x_train[12]))
```
%% Cell type:markdown id: tags:
## Step 4 - Preprocess the data (padding)
In order to be processed by an NN, all entries must have the **same length.**
We chose a review length of **review_len**
We will therefore complete them with a padding (of \<pad\>\)
%% Cell type:code id: tags:
``` python
x_train = keras.preprocessing.sequence.pad_sequences(x_train,
value = 0,
padding = 'post',
maxlen = review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test,
value = 0 ,
padding = 'post',
maxlen = review_len)
fidle.utils.subtitle('After padding :')
print(x_train[12])
fidle.utils.subtitle('In real words :')
print(dataset2text(x_train[12]))
```
%% Cell type:markdown id: tags:
## Step 5 - Build the model
More documentation about this model functions :
- [Embedding](https://keras.io/api/layers/core_layers/embedding/)
- [GlobalAveragePooling1D](https://keras.io/api/layers/pooling_layers/global_average_pooling1d)
%% Cell type:code id: tags:
``` python
model = keras.Sequential()
model.add(keras.layers.Embedding(input_dim = vocab_size, output_dim = dense_vector_size))
model.add(keras.layers.GRU(50))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['accuracy'])
model.summary()
```
%% Cell type:markdown id: tags:
## Step 6 - Train the model
### 6.1 - Add Callbacks
%% Cell type:code id: tags:
``` python
os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)
save_dir = f'{run_dir}/models/best_model.keras'
savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
```
%% Cell type:markdown id: tags:
### 6.2 - Train it
Note : With a scale=0.2, batch_size=128, epochs=10 => Need 4' on a cpu laptop
%% Cell type:code id: tags:
``` python
history = model.fit(x_train,
y_train,
epochs = epochs,
batch_size = batch_size,
validation_data = (x_test, y_test),
verbose = fit_verbosity,
callbacks = [savemodel_callback])
```
%% Cell type:markdown id: tags:
### 6.4 - Training history
%% Cell type:code id: tags:
``` python
fidle.scrawler.history(history, save_as='02-history')
```
%% Cell type:markdown id: tags:
## Step 7 - Evaluation
Reload and evaluate best model
%% Cell type:code id: tags:
``` python
model = keras.models.load_model(f'{run_dir}/models/best_model.keras')
# ---- Evaluate
score = model.evaluate(x_test, y_test, verbose=0)
print('x_test / loss : {:5.4f}'.format(score[0]))
print('x_test / accuracy : {:5.4f}'.format(score[1]))
values=[score[1], 1-score[1]]
fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
# ---- Confusion matrix
y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
y_pred = y_sigmoid.copy()
y_pred[ y_sigmoid< 0.5 ] = 0
y_pred[ y_sigmoid>=0.5 ] = 1
fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
source diff could not be displayed: it is too large. Options to address this: view the blob.
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3GTSRB1] - Dataset analysis and preparation
<!-- DESC --> Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Understand the **complexity associated with data**, even when it is only images
- Learn how to build up a simple and **usable image dataset**
The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
The final aim is to recognise them !
Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
## What we're going to do :
- Understanding the dataset
- Preparing and formatting enhanced data
- Save enhanced datasets in h5 file format
%% Cell type:markdown id: tags:
## Step 1 - Import and init
%% Cell type:code id: tags:
``` python
import os, time, sys
import csv
import math, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py
from skimage.morphology import disk
from skimage.util import img_as_ubyte
from skimage.filters import rank
from skimage import io, color, exposure, transform
from importlib import reload
import fidle
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3GTSRB1')
```
%% Cell type:markdown id: tags:
## Step 2 - Parameters
The generation of datasets may require some time and space : **10' and 10 GB**.
You can choose to perform tests or generate the whole enhanced dataset by setting the following parameters:
`scale` : 1 mean 100% of the dataset - set 0.2 for tests (need 2 minutes with scale = 0.2)
`progress_verbosity`: Verbosity of progress bar: 0=silent, 1=progress bar, 2=One line
`output_dir` : where to write enhanced dataset, could be :
- `./data`, for tests purpose
- `<datasets_dir>/GTSRB/enhanced` to add clusters in your datasets dir.
Uncomment the right lines according to what you want :
%% Cell type:code id: tags:
``` python
# ---- For smart tests :
#
scale = 0.2
output_dir = './data'
# ---- For a Full dataset generation :
#
# scale = 1
# output_dir = f'{datasets_dir}/GTSRB/enhanced'
# ---- Verbosity
#
progress_verbosity = 2
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('scale', 'output_dir', 'progress_verbosity')
```
%% Cell type:markdown id: tags:
## Step 3 - Read the dataset
Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
- Each directory contains one CSV file with annotations : `GT-<ClassID>.csv` and the training images
- First line is fieldnames: `Filename ; Width ; Height ; Roi.X1 ; Roi.Y1 ; Roi.X2 ; Roi.Y2 ; ClassId`
### 3.1 - Understanding the dataset
The original dataset is in : **\<dataset_dir\>/GTSRB/origine.**
There is 3 subsets : **Train**, **Test** and **Meta.**
Each subset have an **csv file** and a **subdir** with **images**.
%% Cell type:code id: tags:
``` python
df = pd.read_csv(f'{datasets_dir}/GTSRB/origine/Test.csv', header=0)
display(df.head(10))
```
%% Cell type:markdown id: tags:
### 3.2 - Usefull functions
A nice function for reading a dataset from an index.csv file.\
Input: an intex.csv file\
Output: an array of images ans an array of corresponding labels
%% Cell type:code id: tags:
``` python
def read_csv_dataset(csv_file):
'''
Reads traffic sign data from German Traffic Sign Recognition Benchmark dataset.
Arguments:
csv filename : Description file, Example /data/GTSRB/Train.csv
Returns:
x,y : np array of images, np array of corresponding labels
'''
path = os.path.dirname(csv_file)
name = os.path.basename(csv_file)
# ---- Read csv file
#
df = pd.read_csv(csv_file, header=0)
# ---- Get filenames and ClassIds
#
filenames = df['Path'].to_list()
y = df['ClassId'].to_list()
x = []
# ---- Read images
#
for filename in filenames:
image=io.imread(f'{path}/{filename}')
x.append(image)
fidle.utils.update_progress(name,len(x),len(filenames), verbosity=progress_verbosity)
# ---- Return
#
return np.array(x,dtype=object),np.array(y)
```
%% Cell type:markdown id: tags:
### 3.2 - Read the data
We will read the following datasets:
- **Train** subset, for learning data as : `x_train, y_train`
- **Test** subset, for validation data as : `x_test, y_test`
- **Meta** subset, for visualisation as : `x_meta, y_meta`
The learning data will be randomly mixted and the illustration data (Meta) sorted.
Will take about 1'30s on HPC or 45s on my labtop.
%% Cell type:code id: tags:
``` python
chrono=fidle.Chrono()
chrono.start()
# ---- Read datasets
(x_train,y_train) = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Train.csv')
(x_test ,y_test) = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Test.csv')
(x_meta ,y_meta) = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Meta.csv')
# ---- Shuffle train set
x_train, y_train = fidle.utils.shuffle_np_dataset(x_train, y_train)
# ---- Sort Meta
combined = list(zip(x_meta,y_meta))
combined.sort(key=lambda x: x[1])
x_meta,y_meta = zip(*combined)
chrono.show()
```
%% Cell type:markdown id: tags:
## Step 4 - Few statistics about train dataset
We want to know if our images are homogeneous in terms of size, ratio, width or height.
### 4.1 - Do statistics
%% Cell type:code id: tags:
``` python
train_size = []
train_ratio = []
train_lx = []
train_ly = []
test_size = []
test_ratio = []
test_lx = []
test_ly = []
for image in x_train:
(lx,ly,lz) = image.shape
train_size.append(lx*ly/1024)
train_ratio.append(lx/ly)
train_lx.append(lx)
train_ly.append(ly)
for image in x_test:
(lx,ly,lz) = image.shape
test_size.append(lx*ly/1024)
test_ratio.append(lx/ly)
test_lx.append(lx)
test_ly.append(ly)
```
%% Cell type:markdown id: tags:
### 4.2 - Show statistics
%% Cell type:code id: tags:
``` python
figsize=(10,4)
# ------ Global stuff
print("x_train shape : ",x_train.shape)
print("y_train shape : ",y_train.shape)
print("x_test shape : ",x_test.shape)
print("y_test shape : ",y_test.shape)
# ------ Statistics / sizes
plt.figure(figsize=figsize)
plt.hist([train_size,test_size], bins=100)
plt.gca().set(title='Sizes in Kpixels - Train=[{:5.2f}, {:5.2f}]'.format(min(train_size),max(train_size)),
ylabel='Population', xlim=[0,30])
plt.legend(['Train','Test'])
fidle.scrawler.save_fig('01-stats-sizes')
plt.show()
# ------ Statistics / ratio lx/ly
plt.figure(figsize=figsize)
plt.hist([train_ratio,test_ratio], bins=100)
plt.gca().set(title='Ratio lx/ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ratio),max(train_ratio)),
ylabel='Population', xlim=[0.8,1.2])
plt.legend(['Train','Test'])
fidle.scrawler.save_fig('02-stats-ratios')
plt.show()
# ------ Statistics / lx
plt.figure(figsize=figsize)
plt.hist([train_lx,test_lx], bins=100)
plt.gca().set(title='Images lx - Train=[{:5.2f}, {:5.2f}]'.format(min(train_lx),max(train_lx)),
ylabel='Population', xlim=[20,150])
plt.legend(['Train','Test'])
fidle.scrawler.save_fig('03-stats-lx')
plt.show()
# ------ Statistics / ly
plt.figure(figsize=figsize)
plt.hist([train_ly,test_ly], bins=100)
plt.gca().set(title='Images ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ly),max(train_ly)),
ylabel='Population', xlim=[20,150])
plt.legend(['Train','Test'])
fidle.scrawler.save_fig('04-stats-ly')
plt.show()
# ------ Statistics / classId
plt.figure(figsize=figsize)
plt.hist([y_train,y_test], bins=43)
plt.gca().set(title='ClassesId', ylabel='Population', xlim=[0,43])
plt.legend(['Train','Test'])
fidle.scrawler.save_fig('05-stats-classes')
plt.show()
```
%% Cell type:markdown id: tags:
## Step 5 - List of classes
What are the 43 classes of our images...
%% Cell type:code id: tags:
``` python
fidle.scrawler.images( x_meta,y_meta, range(43), columns=8, x_size=1.4, y_size=1.4,
colorbar=False, y_pred=None, cm='binary', save_as='06-meta-signs')
```
%% Cell type:markdown id: tags:
## Step 6 - What does it really look like
%% Cell type:code id: tags:
``` python
# ---- Get and show few images
samples = [ random.randint(0,len(x_train)-1) for i in range(32)]
fidle.scrawler.images( x_train,y_train, samples, columns=8, x_size=1.5, y_size=1.5,
colorbar=False, y_pred=None, cm='binary', save_as='07-real-signs')
```
%% Cell type:markdown id: tags:
## Step 7 - dataset cooking...
Images **must** :
- have the **same size** to match the size of the network,
- be **normalized**.
It is possible to work on **rgb** or **monochrome** images and to **equalize** the histograms.
See : [Exposure with scikit-image](https://scikit-image.org/docs/dev/api/skimage.exposure.html)
See : [Local histogram equalization](https://scikit-image.org/docs/dev/api/skimage.filters.rank.html#skimage.filters.rank.equalize)
See : [Histogram equalization](https://scikit-image.org/docs/dev/api/skimage.exposure.html#skimage.exposure.equalize_hist)
### 7.1 - Enhancement cooking
A nice function for preparing our data.
Input: a set of images (numpy array)
Output: a enhanced images, resized and reprocessed (numpy array)
%% Cell type:code id: tags:
``` python
def images_enhancement(images, width=25, height=25, proc='RGB'):
'''
Resize and convert images - doesn't change originals.
input images must be RGBA or RGB.
Note : all outputs are fixed size numpy array of float32
args:
images : images list
width,height : new images size (25,25)
mode : RGB | RGB-HE | L | L-HE | L-LHE | L-CLAHE
return:
numpy array of enhanced images
'''
lz={ 'RGB':3, 'RGB-HE':3, 'L':1, 'L-HE':1, 'L-LHE':1, 'L-CLAHE':1}[proc]
out=[]
for img in images:
# ---- if RGBA, convert to RGB
if img.shape[2]==4:
img=color.rgba2rgb(img)
# ---- Resize
img = transform.resize(img, (width,height))
# ---- RGB / Histogram Equalization
if proc=='RGB-HE':
hsv = color.rgb2hsv(img.reshape(width,height,3))
hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2])
img = color.hsv2rgb(hsv)
# ---- Grayscale
if proc=='L':
img=color.rgb2gray(img)
# ---- Grayscale / Histogram Equalization
if proc=='L-HE':
img=color.rgb2gray(img)
img=exposure.equalize_hist(img)
# ---- Grayscale / Local Histogram Equalization
if proc=='L-LHE':
img=color.rgb2gray(img)
img = img_as_ubyte(img)
img=rank.equalize(img, disk(10))/255.
# ---- Grayscale / Contrast Limited Adaptive Histogram Equalization (CLAHE)
if proc=='L-CLAHE':
img=color.rgb2gray(img)
img=exposure.equalize_adapthist(img)
# ---- Add image in list of list
out.append(img)
fidle.utils.update_progress('Enhancement: ',len(out),len(images))
# ---- Reshape images
# (-1, width,height,1) for L
# (-1, width,height,3) for RGB
#
out = np.array(out,dtype='float32')
out = out.reshape(-1,width,height,lz)
return out
```
%% Cell type:markdown id: tags:
### 7.2 - To get an idea of the different recipes
%% Cell type:code id: tags:
``` python
i=random.randint(0,len(x_train)-16)
x_samples = x_train[i:i+16]
y_samples = y_train[i:i+16]
datasets = {}
datasets['RGB'] = images_enhancement( x_samples, width=25, height=25, proc='RGB' )
datasets['RGB-HE'] = images_enhancement( x_samples, width=25, height=25, proc='RGB-HE' )
datasets['L'] = images_enhancement( x_samples, width=25, height=25, proc='L' )
datasets['L-HE'] = images_enhancement( x_samples, width=25, height=25, proc='L-HE' )
datasets['L-LHE'] = images_enhancement( x_samples, width=25, height=25, proc='L-LHE' )
datasets['L-CLAHE'] = images_enhancement( x_samples, width=25, height=25, proc='L-CLAHE' )
fidle.utils.subtitle('EXPECTED')
x_expected=[ x_meta[i] for i in y_samples]
fidle.scrawler.images(x_expected, y_samples, range(12), columns=12, x_size=1, y_size=1,
colorbar=False, y_pred=None, cm='binary', save_as='08-expected')
fidle.utils.subtitle('ORIGINAL')
fidle.scrawler.images(x_samples, y_samples, range(12), columns=12, x_size=1, y_size=1,
colorbar=False, y_pred=None, cm='binary', save_as='09-original')
fidle.utils.subtitle('ENHANCED')
n=10
for k,d in datasets.items():
print("dataset : {} min,max=[{:.3f},{:.3f}] shape={}".format(k,d.min(),d.max(), d.shape))
fidle.scrawler.images(d, y_samples, range(12), columns=12, x_size=1, y_size=1,
colorbar=False, y_pred=None, cm='binary', save_as=f'{n}-enhanced-{k}')
n+=1
```
%% Cell type:markdown id: tags:
### 7.3 - Cook and save
A function to save a dataset (h5 file)
%% Cell type:code id: tags:
``` python
def save_h5_dataset(x_train, y_train, x_test, y_test, x_meta,y_meta, filename):
# ---- Create h5 file
with h5py.File(filename, "w") as f:
f.create_dataset("x_train", data=x_train)
f.create_dataset("y_train", data=y_train)
f.create_dataset("x_test", data=x_test)
f.create_dataset("y_test", data=y_test)
f.create_dataset("x_meta", data=x_meta)
f.create_dataset("y_meta", data=y_meta)
# ---- done
size=os.path.getsize(filename)/(1024*1024)
print('Dataset : {:24s} shape : {:22s} size : {:6.1f} Mo (saved)'.format(filename, str(x_train.shape),size))
```
%% Cell type:markdown id: tags:
Generate enhanced datasets :
%% Cell type:code id: tags:
``` python
# ---- Size and processings
#
all_size= [24, 48]
all_proc=['RGB', 'RGB-HE', 'L', 'L-LHE']
# ---- Do it
#
chrono.start()
n_train = int( len(x_train)*scale )
n_test = int( len(x_test)*scale )
fidle.utils.subtitle('Parameters :')
print(f'Scale is : {scale}')
print(f'x_train length is : {n_train}')
print(f'x_test length is : {n_test}')
print(f'output dir is : {output_dir}\n')
fidle.utils.subtitle('Running...')
fidle.utils.mkdir(output_dir)
for s in all_size:
for m in all_proc:
# ---- A nice dataset name
filename = f'{output_dir}/set-{s}x{s}-{m}.h5'
fidle.utils.subtitle(f'Dataset : {filename}')
# ---- Enhancement
# Note : x_train is a numpy array of python objects (images with <> sizes)
# but images_enhancement() return a real array of float64 numpy (images with same size)
# so, we can save it in nice h5 files
#
x_train_new = images_enhancement( x_train[:n_train], width=s, height=s, proc=m )
x_test_new = images_enhancement( x_test[:n_test], width=s, height=s, proc=m )
x_meta_new = images_enhancement( x_meta, width=s, height=s, proc='RGB' )
# ---- Save
save_h5_dataset( x_train_new, y_train[:n_train], x_test_new, y_test[:n_test], x_meta_new,y_meta, filename)
x_train_new,x_test_new=0,0
print('\nDone.')
chrono.show()
```
%% Cell type:markdown id: tags:
## Step 8 - Reload data to be sure ;-)
%% Cell type:code id: tags:
``` python
chrono.start()
dataset='set-24x24-L'
samples=range(24)
with h5py.File(f'{output_dir}/{dataset}.h5','r') as f:
x_tmp = f['x_train'][:]
y_tmp = f['y_train'][:]
print("dataset loaded from h5 file.")
fidle.scrawler.images(x_tmp,y_tmp, samples, columns=8, x_size=1.5, y_size=1.5,
colorbar=False, y_pred=None, cm='binary', save_as='16-enhanced_images')
x_tmp,y_tmp=0,0
chrono.show()
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3GTSRB2] - First convolutions
<!-- DESC --> Episode 2 : First convolutions and first classification of our traffic signs, using Keras3
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- Recognizing traffic signs
- Understand the **principles** and **architecture** of a **convolutional neural network** for image classification
The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
The final aim is to recognise them !
Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
**IMPORTANT :** To be able to use this notebook and the following, **you must have generated the enhanced datasets** in <dataset_dir>/enhanced via the notebook **[01-Preparation-of-data.ipynb](01-Preparation-of-data.ipynb)**
## What we're going to do :
- Read H5 dataset
- Build a model
- Train the model
- Evaluate the model
## Step 1 - Import and init
### 1.1 - Python stuff
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import numpy as np
import matplotlib.pyplot as plt
import h5py
import os,time,sys
from importlib import reload
# Init Fidle environment
import fidle
run_id, run_dir, datasets_dir = fidle.init('K3GTSRB2')
```
%% Cell type:markdown id: tags:
### 1.2 - Parameters
`scale` is the proportion of the dataset that will be used during the training. (1 mean 100%)
A 20% 24x24 dataset, with 5 epochs and a scale of 1, need **3'30** on a CPU laptop.\
`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch
%% Cell type:code id: tags:
``` python
enhanced_dir = './data'
# enhanced_dir = f'{datasets_dir}/GTSRB/enhanced'
dataset_name = 'set-24x24-L'
batch_size = 64
epochs = 5
scale = 1
fit_verbosity = 1
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('enhanced_dir', 'dataset_name', 'batch_size', 'epochs', 'scale', 'fit_verbosity')
```
%% Cell type:markdown id: tags:
## Step 2 - Load dataset
We're going to retrieve a previously recorded dataset.
For example: set-24x24-L
%% Cell type:code id: tags:
``` python
def read_dataset(enhanced_dir, dataset_name, scale=1):
'''
Reads h5 dataset
Args:
filename : datasets filename
dataset_name : dataset name, without .h5
Returns:
x_train,y_train, x_test,y_test data, x_meta,y_meta
'''
# ---- Read dataset
#
chrono=fidle.Chrono()
chrono.start()
filename = f'{enhanced_dir}/{dataset_name}.h5'
with h5py.File(filename,'r') as f:
x_train = f['x_train'][:]
y_train = f['y_train'][:]
x_test = f['x_test'][:]
y_test = f['y_test'][:]
x_meta = f['x_meta'][:]
y_meta = f['y_meta'][:]
# ---- Rescale
#
print('Original shape :', x_train.shape, y_train.shape)
x_train,y_train, x_test,y_test = fidle.utils.rescale_dataset(x_train,y_train,x_test,y_test, scale=scale)
print('Rescaled shape :', x_train.shape, y_train.shape)
# ---- Shuffle
#
x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)
# ---- done
#
duration = chrono.get_delay()
size = fidle.utils.hsize(os.path.getsize(filename))
print(f'\nDataset "{dataset_name}" is loaded and shuffled. ({size} in {duration})')
return x_train,y_train, x_test,y_test, x_meta,y_meta
# ---- Read dataset
#
x_train,y_train,x_test,y_test, x_meta,y_meta = read_dataset(enhanced_dir, dataset_name, scale)
```
%% Cell type:markdown id: tags:
## Step 3 - Have a look to the dataset
We take a quick look as we go by...
%% Cell type:code id: tags:
``` python
print("x_train : ", x_train.shape)
print("y_train : ", y_train.shape)
print("x_test : ", x_test.shape)
print("y_test : ", y_test.shape)
fidle.scrawler.images(x_train, y_train, range(12), columns=6, x_size=2, y_size=2, save_as='01-dataset-medium')
fidle.scrawler.images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1, save_as='02-dataset-small')
```
%% Cell type:markdown id: tags:
## Step 4 - Create model
We will now build a model and train it...
Some models :
%% Cell type:code id: tags:
``` python
# ------------------------------------------------------------------
# -- A simple model, for 24x24 or 48x48 images --
# ------------------------------------------------------------------
#
def get_model_01(lx,ly,lz):
model = keras.models.Sequential()
model.add( keras.layers.Input((lx,ly,lz)) )
model.add( keras.layers.Conv2D(96, (3,3), activation='relu' ))
model.add( keras.layers.MaxPooling2D((2, 2)))
model.add( keras.layers.Dropout(0.2))
model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
model.add( keras.layers.MaxPooling2D((2, 2)))
model.add( keras.layers.Dropout(0.2))
model.add( keras.layers.Flatten())
model.add( keras.layers.Dense(1500, activation='relu'))
model.add( keras.layers.Dropout(0.5))
model.add( keras.layers.Dense(43, activation='softmax'))
return model
# ------------------------------------------------------------------
# -- A more sophisticated model, for 48x48 images --
# ------------------------------------------------------------------
#
def get_model_02(lx,ly,lz):
model = keras.models.Sequential()
model.add( keras.layers.Input((lx,ly,lz)) )
model.add( keras.layers.Conv2D(32, (3,3), activation='relu'))
model.add( keras.layers.MaxPooling2D((2, 2)))
model.add( keras.layers.Dropout(0.5))
model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add( keras.layers.MaxPooling2D((2, 2)))
model.add( keras.layers.Dropout(0.5))
model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
model.add( keras.layers.MaxPooling2D((2, 2)))
model.add( keras.layers.Dropout(0.5))
model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
model.add( keras.layers.MaxPooling2D((2, 2)))
model.add( keras.layers.Dropout(0.5))
model.add( keras.layers.Flatten())
model.add( keras.layers.Dense(1152, activation='relu'))
model.add( keras.layers.Dropout(0.5))
model.add( keras.layers.Dense(43, activation='softmax'))
return model
```
%% Cell type:markdown id: tags:
## Step 5 - Train the model
**Get the shape of my data :**
%% Cell type:code id: tags:
``` python
(n,lx,ly,lz) = x_train.shape
print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
```
%% Cell type:markdown id: tags:
**Get and compile a model, with the data shape :**
%% Cell type:code id: tags:
``` python
model = get_model_01(lx,ly,lz)
model.summary()
model.compile(optimizer = 'adam',
loss = 'sparse_categorical_crossentropy',
metrics = ['accuracy'])
```
%% Cell type:markdown id: tags:
**Train it :**
%% Cell type:code id: tags:
``` python
chrono=fidle.Chrono()
chrono.start()
# ---- Shuffle train data
x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)
# ---- Train
history = model.fit( x_train, y_train,
batch_size = batch_size,
epochs = epochs,
verbose = fit_verbosity,
validation_data = (x_test, y_test))
chrono.show()
```
%% Cell type:markdown id: tags:
## Step 5 - Evaluate
%% Cell type:code id: tags:
``` python
max_val_accuracy = max(history.history["val_accuracy"])
print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
```
%% Cell type:code id: tags:
``` python
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss : {:5.4f}'.format(score[0]))
print('Test accuracy : {:5.4f}'.format(score[1]))
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
<div class="todo">
What you can do:
<ul>
<li>Try the different models</li>
<li>Try with different datasets</li>
<li>Test different hyperparameters (epochs, batch size, optimization, etc.)</li>
<li>Create your own model</li>
</ul>
</div>
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
%% Cell type:markdown id: tags:
<img width="800px" src="../fidle/img/header.svg"></img>
# <!-- TITLE --> [K3GTSRB3] - Training monitoring
<!-- DESC --> Episode 3 : Monitoring, analysis and check points during a training session, using Keras3
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
## Objectives :
- **Understand** what happens during the **training** process
- Implement **monitoring**, **backup** and **recovery** solutions
The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
The final aim is to recognise them !
Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
## What we're going to do :
- Monitoring and understanding our model training
- Add recovery points
- Analyze the results
- Restore and run recovery points
## Step 1 - Import and init
### 1.1 - Python stuffs
%% Cell type:code id: tags:
``` python
import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
import numpy as np
import os, random
import fidle
import modules.my_loader as my_loader
import modules.my_models as my_models
import modules.my_tools as my_tools
from modules.my_TensorboardCallback import TensorboardCallback
# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('K3GTSRB3')
```
%% Cell type:markdown id: tags:
### 1.2 - Parameters
`scale` is the proportion of the dataset that will be used during the training. (1 mean 100%)
- A 20% 24x24 L dataset, 10 epochs, 20% dataset, need 1'30 on a CPU laptop. (Accuracy=91.4)\
- A 20% 48x48 RGB dataset, 10 epochs, 20% dataset, need 6'30s on a CPU laptop. (Accuracy=91.5)
`model_name` is the model name from modules.my_models :
- model_01 for 24x24 ou 48x48 images
- model_02 for 48x48 images
`fit_verbosity` is the verbosity during training :
- 0 = silent, 1 = progress bar, 2 = one line per epoch
%% Cell type:code id: tags:
``` python
enhanced_dir = './data'
# enhanced_dir = f'{datasets_dir}/GTSRB/enhanced'
model_name = 'model_01'
dataset_name = 'set-24x24-L'
batch_size = 64
epochs = 10
scale = 1
fit_verbosity = 1
```
%% Cell type:markdown id: tags:
Override parameters (batch mode) - Just forget this cell
%% Cell type:code id: tags:
``` python
fidle.override('enhanced_dir', 'model_name', 'dataset_name', 'batch_size', 'epochs', 'scale', 'fit_verbosity')
```
%% Cell type:markdown id: tags:
## Step 2 - Load dataset
Dataset is one of the saved dataset...
%% Cell type:code id: tags:
``` python
x_train,y_train,x_test,y_test, x_meta,y_meta = my_loader.read_dataset(enhanced_dir, dataset_name, scale)
```
%% Cell type:markdown id: tags:
## Step 3 - Have a look to the dataset
%% Cell type:code id: tags:
``` python
print("x_train : ", x_train.shape)
print("y_train : ", y_train.shape)
print("x_test : ", x_test.shape)
print("y_test : ", y_test.shape)
fidle.scrawler.images(x_train, y_train, range(24), columns=8, x_size=1, y_size=1, save_as='02-dataset-small')
```
%% Cell type:markdown id: tags:
## Step 4 - Get a model
%% Cell type:code id: tags:
``` python
(n,lx,ly,lz) = x_train.shape
model = my_models.get_model( model_name, lx,ly,lz )
model.summary()
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
```
%% Cell type:markdown id: tags:
## Step 5 - Prepare callbacks
We will add 2 callbacks :
**TensorBoard**
Training logs, which can be visualised using [Tensorboard tool](https://www.tensorflow.org/tensorboard).
**Model backup**
It is possible to save the model each xx epoch or at each improvement.
The model can be saved completely or partially (weight).
See [Keras documentation](https://keras.io/api/callbacks/)
%% Cell type:code id: tags:
``` python
fidle.utils.mkdir(run_dir + '/models')
fidle.utils.mkdir(run_dir + '/logs')
# ---- Callback for tensorboard (This one is homemade !)
#
tenseorboard_callback = TensorboardCallback(
log_dir=run_dir + "/logs/tb_" + fidle.Chrono.tag_now())
# ---- Callback to save best model
#
bestmodel_callback = keras.callbacks.ModelCheckpoint(
filepath= run_dir + "/models/best-model.keras",
monitor='val_accuracy',
mode='max',
save_best_only=True)
# ---- Callback to save model from each epochs
#
savemodel_callback = keras.callbacks.ModelCheckpoint(
filepath= run_dir + "/models/{epoch:02d}.keras",
save_freq="epoch")
```
%% Cell type:markdown id: tags:
## Step 6 - Train the model
To access logs with tensorboad :
- Under **Docker**, from a terminal launched via the jupyterlab launcher, use the following command:<br>
```tensorboard --logdir <path-to-logs> --host 0.0.0.0```
- If you're not using Docker, from a terminal :<br>
```tensorboard --logdir <path-to-logs>```
**Note:** One tensorboard instance can be used simultaneously.
%% Cell type:markdown id: tags:
**Train it :**
Note: The training curve is visible in real time with Tensorboard (see step 5)
%% Cell type:code id: tags:
``` python
chrono=fidle.Chrono()
chrono.start()
# ---- Shuffle train data
x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)
# ---- Train
# Note: To be faster in our example, we can take only 2000 values
#
history = model.fit( x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=fit_verbosity,
validation_data=(x_test, y_test),
callbacks=[tenseorboard_callback, bestmodel_callback, savemodel_callback] )
model.save(f'{run_dir}/models/last-model.keras')
chrono.show()
```
%% Cell type:markdown id: tags:
**Evaluate it :**
%% Cell type:code id: tags:
``` python
max_val_accuracy = max(history.history["val_accuracy"])
print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
```
%% Cell type:code id: tags:
``` python
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss : {:5.4f}'.format(score[0]))
print('Test accuracy : {:5.4f}'.format(score[1]))
```
%% Cell type:markdown id: tags:
## Step 7 - History
The return of model.fit() returns us the learning history
%% Cell type:code id: tags:
``` python
fidle.scrawler.history(history, save_as='03-history')
```
%% Cell type:markdown id: tags:
## Step 8 - Evaluation and confusion
%% Cell type:code id: tags:
``` python
y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
y_pred = np.argmax(y_sigmoid, axis=-1)
fidle.scrawler.confusion_matrix(y_test,y_pred,range(43), figsize=(12, 12),normalize=False, save_as='04-confusion-matrix')
```
%% Cell type:markdown id: tags:
## Step 9 - Restore and evaluate
#### List saved models :
%% Cell type:code id: tags:
``` python
# !ls -1rt "$run_dir"/models/
```
%% Cell type:markdown id: tags:
#### Restore a model :
%% Cell type:code id: tags:
``` python
loaded_model = keras.models.load_model(f'{run_dir}/models/best-model.keras')
# loaded_model.summary()
print("Loaded.")
```
%% Cell type:markdown id: tags:
#### Evaluate it :
%% Cell type:code id: tags:
``` python
score = loaded_model.evaluate(x_test, y_test, verbose=0)
print('Test loss : {:5.4f}'.format(score[0]))
print('Test accuracy : {:5.4f}'.format(score[1]))
```
%% Cell type:markdown id: tags:
#### Make a prediction :
%% Cell type:code id: tags:
``` python
# ---- Pick a random image
#
i = random.randint(1,len(x_test))
x,y = x_test[i], y_test[i]
# ---- Do prediction
#
prediction = loaded_model.predict( np.array([x]), verbose=fit_verbosity )
# ---- Show result
my_tools.show_prediction( prediction, x, y, x_meta )
```
%% Cell type:code id: tags:
``` python
fidle.end()
```
%% Cell type:markdown id: tags:
## Step 10 - To go further ;-)
What you can do:
- Try differents models
- Use a subset of the dataset
- Try different datasets
- Try to recognize exotic signs !
- Test different hyperparameters (epochs, batch size, optimization, etc.
%% Cell type:markdown id: tags:
---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
...@@ -19,21 +19,42 @@ ...@@ -19,21 +19,42 @@
# Fidle at GRICAD # Fidle at GRICAD
# ----------------------------------------------- # -----------------------------------------------
# #
# <!-- TITLE --> [GTS8] - OAR batch submission # <!-- TITLE --> [K3GTSRB10] - OAR batch script submission
# <!-- DESC --> Bash script for OAR batch submission of GTSRB notebook # <!-- DESC --> Bash script for an OAR batch submission of an ipython code
# <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) --> # <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
CONDA_ENV=fidle # ==== Notebook parameters =========================================
RUN_DIR=~/fidle/GTSRB
RUN_SCRIPT=./run/full_convolutions.py
# ---- This is an example tested at GRICAD CONDA_ENV='fidle'
# You have to adapt it to your computing environment NOTEBOOK_DIR="~/fidle/GTSRB"
SCRIPT_IPY="03-Better-convolutions.py"
# ---- Environment vars used to override notebook/script parameters
# 'enhanced_dir', 'model_name', 'dataset_name', 'batch_size', 'epochs', 'scale', 'fit_verbosity'
export FIDLE_OVERRIDE_GTSRB3_run_dir="./data"
export FIDLE_OVERRIDE_GTSRB3_enhanced_dir="./run/GTSRB3"
export FIDLE_OVERRIDE_GTSRB3_model_name="model_01"
export FIDLE_OVERRIDE_GTSRB3_dataset_name="set-24x24-L"
export FIDLE_OVERRIDE_GTSRB3_batch_size=64
export FIDLE_OVERRIDE_GTSRB3_epochs=5
export FIDLE_OVERRIDE_GTSRB3_scale=1
export FIDLE_OVERRIDE_GTSRB3_fit_verbosity=0
# ==================================================================
echo '------------------------------------------------------------' echo '------------------------------------------------------------'
echo "Start : $0" echo "Start : $0"
echo '------------------------------------------------------------' echo '------------------------------------------------------------'
# echo "Notebook dir : $NOTEBOOK_DIR"
echo "Script : $SCRIPT_IPY"
echo "Environment : $CONDA_ENV"
echo '------------------------------------------------------------'
env | grep FIDLE_OVERRIDE | awk 'BEGIN { FS = "=" } ; { printf("%-35s : %s\n",$1,$2) }'
echo '------------------------------------------------------------'
source /applis/environments/cuda_env.sh dahu 10.0 source /applis/environments/cuda_env.sh dahu 10.0
source /applis/environments/conda.sh source /applis/environments/conda.sh
# #
...@@ -41,5 +62,8 @@ conda activate "$CONDA_ENV" ...@@ -41,5 +62,8 @@ conda activate "$CONDA_ENV"
# ---- Run it... # ---- Run it...
# #
cd $RUN_DIR cd $NOTEBOOK_DIR
ipython $RUN_SCRIPT
ipython "$SCRIPT_IPY"
echo 'Done.'