Compare revisions

1cff5b5f · 1cff5b5f · 1cff5b5f · 1cff5b5f · 1cff5b5f · 1cff5b5f
--- a/DCGAN.Lightning/modules/WGANGP.py
+++ b/DCGAN.Lightning/modules/WGANGP.py
+
+# ------------------------------------------------------------------
+#     _____ _     _ _
+#    |  ___(_) __| | | ___
+#    | |_  | |/ _` | |/ _ \
+#    |  _| | | (_| | |  __/
+#    |_|   |_|\__,_|_|\___|                   WGANGP LigthningModule
+# ------------------------------------------------------------------
+# Formation Introduction au Deep Learning  (FIDLE)
+# CNRS/MIAI - https://fidle.cnrs.fr
+# ------------------------------------------------------------------
+# JL Parouty (Mars 2024)
+
+
+
+import sys
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision
+from lightning import LightningModule
+
+
+class WGANGP(LightningModule):
+
+    # -------------------------------------------------------------------------
+    # Init
+    # -------------------------------------------------------------------------
+    #
+    def __init__(
+        self,
+        data_shape          = (None,None,None),
+        latent_dim          = None,
+        lr                  = 0.0002,
+        b1                  = 0.5,
+        b2                  = 0.999,
+        batch_size          = 64,
+        lambda_gp           = 10,
+        generator_name      = None,
+        discriminator_name  = None,
+        **kwargs,
+    ):
+        super().__init__()
+
+        print('\n---- GAN initialization --------------------------------------------')
+
+        # ---- Hyperparameters
+        #
+        # Enable Lightning to store all the provided arguments under the self.hparams attribute.
+        # These hyperparameters will also be stored within the model checkpoint.
+        #
+        self.save_hyperparameters()
+
+        print('Hyperarameters are :')
+        for name,value in self.hparams.items():
+            print(f'{name:24s} : {value}')
+
+        # ---- Because we have more than one optimizer
+        #
+        self.automatic_optimization = False
+
+        # ---- Generator/Discriminator instantiation
+        #
+        print('Submodels :')
+        module=sys.modules['__main__']
+        class_g = getattr(module, generator_name)
+        class_d = getattr(module, discriminator_name)
+        self.generator     = class_g( latent_dim=latent_dim, data_shape=data_shape)
+        self.discriminator = class_d( latent_dim=latent_dim, data_shape=data_shape)
+
+        # ---- Validation and example data
+        #
+        self.validation_z        = torch.randn(8, self.hparams.latent_dim)
+        self.example_input_array = torch.zeros(2, self.hparams.latent_dim)
+
+
+    def forward(self, z):
+        return self.generator(z)
+
+
+    def adversarial_loss(self, y_pred, y):
+        return F.binary_cross_entropy(y_pred, y)
+
+
+
+    def gradient_penalty(self, real_images, fake_images):
+
+        # see: https://medium.com/dejunhuang/implementing-gan-and-wgan-in-pytorch-551099afde3c
+
+        batch_size = real_images.size(0)
+
+        # ---- Create interpolate images
+        #
+        # Get a random vector : size=([batch_size])
+        epsilon = torch.distributions.uniform.Uniform(0, 1).sample([batch_size])
+        
+        # Add dimensions to match images batch : size=([batch_size,1,1,1])
+        epsilon = epsilon[:, None, None, None]
+        
+        # Put epsilon a the right place
+        epsilon = epsilon.type_as(real_images)
+        
+        # Do interpolation
+        interpolates = epsilon * fake_images + ((1 - epsilon) * real_images)
+
+        # ---- Use autograd to compute gradient
+        #
+        # The key to making this work is including `create_graph`, this means that the computations
+        # in this penalty will be added to the computation graph for the loss function, so that the
+        # second partial derivatives will be correctly computed.
+        #
+        interpolates.requires_grad_()
+
+        pred_labels = self.discriminator.forward(interpolates)
+
+        gradients = torch.autograd.grad(  inputs       = interpolates,
+                                          outputs      = pred_labels, 
+                                          grad_outputs = torch.ones_like(pred_labels),
+                                          create_graph = True, 
+                                          retain_graph = True,
+                                          only_inputs  = True )[0]
+
+        grad_flat   = gradients.view(batch_size, -1)
+        grad_norm   = torch.linalg.norm(grad_flat, dim=1)
+
+        grad_penalty = (grad_norm - 1) ** 2 
+
+        # gp = torch.pow(grads.norm(2, dim=1) - 1, 2).mean()
+
+        return grad_penalty
+
+
+
+    def training_step(self, batch, batch_idx):
+        real_imgs  = batch
+        batch_size = batch.size(0)
+        lambda_gp  = self.hparams.lambda_gp
+
+
+        optimizer_g, optimizer_d = self.optimizers()
+
+        # ---- Get some latent space vectors
+        #      We use type_as() to make sure we initialize z on the right device (GPU/CPU).
+        #
+        z = torch.randn(batch_size, self.hparams.latent_dim)
+        z = z.type_as(real_imgs)
+        
+        # ---- Train generator ------------------------------------------------
+        #      Generator use optimizer #0
+        #      We try to generate false images that could mislead the discriminator
+        # ---------------------------------------------------------------------
+        #
+        self.toggle_optimizer(optimizer_g)
+                
+        # Get fake images
+        fake_imgs = self.generator.forward(z)
+        
+        # Get critics
+        critics   = self.discriminator.forward(fake_imgs)
+
+        # Loss
+        g_loss = -critics.mean()
+
+        # Log
+        self.log("g_loss", g_loss, prog_bar=True)
+
+        # Backward loss
+        self.manual_backward(g_loss)
+        
+        optimizer_g.step()
+        optimizer_g.zero_grad()
+        
+        self.untoggle_optimizer(optimizer_g)
+
+        # ---- Train discriminator --------------------------------------------
+        #      Discriminator use optimizer #1
+        #      We try to make the difference between fake images and real ones 
+        # ---------------------------------------------------------------------
+        #
+        self.toggle_optimizer(optimizer_d)
+
+        # Get critics
+        critics_real = self.discriminator.forward(real_imgs)
+        critics_fake = self.discriminator.forward(fake_imgs.detach())
+
+        # Get gradient penalty
+        grad_penalty = self.gradient_penalty(real_imgs, fake_imgs.detach())
+
+        # Loss
+        d_loss = critics_fake.mean() - critics_real.mean() + lambda_gp*grad_penalty.mean()
+
+        # Log loss
+        self.log("d_loss", d_loss, prog_bar=True)
+
+        # Backward
+        self.manual_backward(d_loss)
+        
+        optimizer_d.step()
+        optimizer_d.zero_grad()
+        
+        self.untoggle_optimizer(optimizer_d)
+ 
+
+
+    def configure_optimizers(self):
+
+        lr = self.hparams.lr
+        b1 = self.hparams.b1
+        b2 = self.hparams.b2
+
+        # With a GAN, we need 2 separate optimizer.
+        # opt_g = torch.optim.Adam(self.generator.parameters(),     lr=lr, betas=(b1, b2))
+        # opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2),)
+        opt_g = torch.optim.Adam(self.generator.parameters(),     lr=lr)
+        opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr)
+        return [opt_g, opt_d], []
+
+
+    def on_train_epoch_end(self):
+
+        # ---- Log Graph
+        #
+        if(self.current_epoch==1):
+            sampleImg=torch.rand((1,28,28,1))
+            sampleImg=sampleImg.type_as(self.generator.model[0].weight)
+            self.logger.experiment.add_graph(self.discriminator,sampleImg)
+
+        # ---- Log some of these images
+        #
+        z = torch.randn(self.hparams.batch_size, self.hparams.latent_dim)
+        z = z.type_as(self.generator.model[0].weight)
+        sample_imgs = self.generator(z)
+        sample_imgs = sample_imgs.permute(0, 3, 1, 2) # from NHWC to NCHW
+        grid = torchvision.utils.make_grid(tensor=sample_imgs, nrow=12, )
+        self.logger.experiment.add_image(f"Generated images", grid,self.current_epoch)
--- a/DDPM.PyTorch/01-ddpm.ipynb
+++ b/DDPM.PyTorch/01-ddpm.ipynb
+%% Cell type:markdown id:756b572d tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [DDPM1] - Fashion MNIST Generation with DDPM
+<!-- DESC --> Diffusion Model example, to generate Fashion MNIST images.
+
+<!-- AUTHOR : Hatim Bourfoune (CNRS/IDRIS), Maxime Song (CNRS/IDRIS) -->
+
+## Objectives :
+ - Understanding and implementing a **Diffusion Model** neurals network (DDPM)
+
+The calculation needs being important, it is preferable to use a very simple dataset such as MNIST to start with.
+...MNIST with a small scale (need to adapt the code !) if you haven't a GPU ;-)
+
+
+## Acknowledgements :
+This notebook was heavily inspired by this [article](https://huggingface.co/blog/annotated-diffusion) and this [notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/annotated_diffusion.ipynb#scrollTo=5153024b).
+
+%% Cell type:code id:54a15542 tags:
+
+``` python
+import math
+from inspect import isfunction
+from functools import partial
+import random
+import IPython
+
+import matplotlib.pyplot as plt
+from tqdm.auto import tqdm
+from einops import rearrange
+
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+from datasets import load_dataset, load_from_disk
+
+from torchvision import transforms
+from torchvision.utils import make_grid
+from torch.utils.data import DataLoader
+import numpy as np
+from PIL import Image
+from torch.optim import Adam
+
+from torchvision.transforms import Compose, ToTensor, Lambda, ToPILImage, CenterCrop, Resize
+import matplotlib.pyplot as plt
+```
+
+%% Cell type:code id:a854c28a tags:
+
+``` python
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Reproductibility
+torch.manual_seed(53)
+random.seed(53)
+np.random.seed(53)
+```
+
+%% Cell type:markdown id:e33f10db tags:
+
+## Create dataset
+We will use the library HuggingFace Datasets to get our Fashion MNIST. If you are using Jean Zay, the dataset is already downloaded in the DSDIR, so you can use the code as it is. If you are not using Jean Zay, you should use the function load_dataset (commented) instead of load_from_disk. It will automatically download the dataset if it is not downloaded already.
+
+%% Cell type:code id:918c0138 tags:
+
+``` python
+dataset = load_dataset("fashion_mnist")
+dataset
+```
+
+%% Cell type:markdown id:cfe4d4f5 tags:
+
+As you can see the dataset is composed of two subparts: train and test. So the dataset is already split for us. We'll use the train part for now. <br/>
+We can also see that the dataset as two features per sample: 'image' corresponding to the PIL version of the image and 'label' corresponding to the class of the image (shoe, shirt...). We can also see that there are 60 000 samples in our train dataset.
+
+%% Cell type:code id:2280400d tags:
+
+``` python
+train_dataset = dataset['train']
+train_dataset[0]
+```
+
+%% Cell type:markdown id:7978ad3d tags:
+
+Each sample of a HuggingFace dataset is a dictionary containing the data.
+
+%% Cell type:code id:0d157e11 tags:
+
+``` python
+image = train_dataset[0]['image']
+image
+```
+
+%% Cell type:code id:5dea3e5a tags:
+
+``` python
+image_array = np.asarray(image, dtype=np.uint8)
+print(f"shape of the image: {image_array.shape}")
+print(f"min: {image_array.min()}, max: {image_array.max()}")
+```
+
+%% Cell type:markdown id:f86937e9 tags:
+
+We will now create a function that get the Fashion MNIST dataset needed, apply all the transformations we want on it and encapsulate that dataset in a dataloader.
+
+%% Cell type:code id:e646a7b1 tags:
+
+``` python
+# load hugging face dataset from the DSDIR
+def get_dataset(data_path, batch_size, test = False):
+
+    dataset = load_from_disk(data_path)
+    # dataset = load_dataset(data_path)  # Use this one if you're not on Jean Zay
+
+    # define image transformations (e.g. using torchvision)
+    transform = Compose([
+        transforms.RandomHorizontalFlip(),  # Data augmentation
+        transforms.ToTensor(),  # Transform PIL image into tensor of value between [0,1]
+        transforms.Lambda(lambda t: (t * 2) - 1)  # Normalize values between [-1,1]
+    ])
+
+    # define function for HF dataset transform
+    def transforms_im(examples):
+        examples['pixel_values'] = [transform(image) for image in examples['image']]
+        del examples['image']
+        return examples
+
+    dataset = dataset.with_transform(transforms_im).remove_columns('label')  # We don't need it
+    channels, image_size, _ = dataset['train'][0]['pixel_values'].shape
+
+    if test:
+        dataloader = DataLoader(dataset['test'], batch_size=batch_size)
+    else:
+        dataloader = DataLoader(dataset['train'], batch_size=batch_size, shuffle=True)
+
+    len_dataloader = len(dataloader)
+    print(f"channels: {channels}, image dimension: {image_size}, len_dataloader: {len_dataloader}")
+
+    return dataloader, channels, image_size, len_dataloader
+```
+
+%% Cell type:markdown id:413a3fea tags:
+
+We choose the parameters and we instantiate the dataset:
+
+%% Cell type:code id:918233da tags:
+
+``` python
+# Dataset parameters
+batch_size = 64
+data_path = "/gpfsdswork/dataset/HuggingFace/fashion_mnist/fashion_mnist/"
+# data_path = "fashion_mnist"  # If you're not using Jean Zay
+```
+
+%% Cell type:code id:85939f9d tags:
+
+``` python
+train_dataloader, channels, image_size, len_dataloader = get_dataset(data_path, batch_size)
+
+batch_image = next(iter(train_dataloader))['pixel_values']
+batch_image.shape
+```
+
+%% Cell type:markdown id:104db929 tags:
+
+We also create a function that allows us to see a batch of images:
+
+%% Cell type:code id:196370c2 tags:
+
+``` python
+def normalize_im(images):
+    shape = images.shape
+    images = images.view(shape[0], -1)
+    images -= images.min(1, keepdim=True)[0]
+    images /= images.max(1, keepdim=True)[0]
+    return images.view(shape)
+
+def show_images(batch):
+    plt.imshow(torch.permute(make_grid(normalize_im(batch)), (1,2,0)))
+    plt.show()
+```
+
+%% Cell type:code id:96334e60 tags:
+
+``` python
+show_images(batch_image[:])
+```
+
+%% Cell type:markdown id:1befee67 tags:
+
+## Forward Diffusion
+The aim of this part is to create a function that will add noise to any image at any step (following the DDPM diffusion process).
+
+%% Cell type:markdown id:231629ad tags:
+
+### Beta scheduling
+First, we create a function that will compute every betas of every steps (following a specific shedule). We will only create a function for the linear schedule (original DDPM) and the cosine schedule (improved DDPM):
+
+%% Cell type:code id:0039d38d tags:
+
+``` python
+# Different type of beta schedule
+def linear_beta_schedule(timesteps, beta_start = 0.0001, beta_end = 0.02):
+    """
+    linar schedule from the original DDPM paper https://arxiv.org/abs/2006.11239
+    """
+    return torch.linspace(beta_start, beta_end, timesteps)
+
+
+def cosine_beta_schedule(timesteps, s=0.008):
+    """
+    cosine schedule as proposed in https://arxiv.org/abs/2102.09672
+    """
+    steps = timesteps + 1
+    x = torch.linspace(0, timesteps, steps)
+    alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * torch.pi * 0.5) ** 2
+    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
+    betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
+    return torch.clip(betas, 0.0001, 0.9999)
+```
+
+%% Cell type:markdown id:e18d1b38 tags:
+
+### Constants calculation
+We will now create a function to calculate every constants we need for our Diffusion Model. <br/>
+Constants:
+- $ \beta_t $: betas
+- $ \sqrt{\frac{1}{\alpha_t}} $: sqrt_recip_alphas
+- $ \sqrt{\bar{\alpha}_t} $: sqrt_alphas_cumprod
+- $ \sqrt{1-\bar{\alpha}_t} $: sqrt_one_minus_alphas_cumprod
+- $ \tilde{\beta}_t = \beta_t\frac{1-\bar{\alpha}_{t-1}}{1-\bar{\alpha}_t} $: posterior_variance
+
+%% Cell type:code id:84251513 tags:
+
+``` python
+# Function to get alphas and betas
+def get_alph_bet(timesteps, schedule=cosine_beta_schedule):
+
+    # define beta
+    betas = schedule(timesteps)
+
+    # define alphas
+    alphas = 1. - betas
+    alphas_cumprod = torch.cumprod(alphas, axis=0) # cumulative product of alpha
+    alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0)  # corresponding to the prev const
+    sqrt_recip_alphas = torch.sqrt(1.0 / alphas)
+
+    # calculations for diffusion q(x_t | x_{t-1}) and others
+    sqrt_alphas_cumprod = torch.sqrt(alphas_cumprod)
+    sqrt_one_minus_alphas_cumprod = torch.sqrt(1. - alphas_cumprod)
+
+    # calculations for posterior q(x_{t-1} | x_t, x_0)
+    posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod)
+
+    const_dict = {
+        'betas': betas,
+        'sqrt_recip_alphas': sqrt_recip_alphas,
+        'sqrt_alphas_cumprod': sqrt_alphas_cumprod,
+        'sqrt_one_minus_alphas_cumprod': sqrt_one_minus_alphas_cumprod,
+        'posterior_variance': posterior_variance
+    }
+
+    return const_dict
+```
+
+%% Cell type:markdown id:d5658d8e tags:
+
+### Difference between Linear and Cosine schedule
+We can check the differences between the constants when we change the parameters:
+
+%% Cell type:code id:7bfdf98c tags:
+
+``` python
+T = 1000
+const_linear_dict = get_alph_bet(T, schedule=linear_beta_schedule)
+const_cosine_dict = get_alph_bet(T, schedule=cosine_beta_schedule)
+
+plt.plot(np.arange(T), const_linear_dict['sqrt_alphas_cumprod'], color='r', label='linear')
+plt.plot(np.arange(T), const_cosine_dict['sqrt_alphas_cumprod'], color='g', label='cosine')
+
+# Naming the x-axis, y-axis and the whole graph
+plt.xlabel("Step")
+plt.ylabel("alpha_bar")
+plt.title("Linear and Cosine schedules")
+
+# Adding legend, which helps us recognize the curve according to it's color
+plt.legend()
+
+# To load the display window
+plt.show()
+```
+
+%% Cell type:markdown id:b1537984 tags:
+
+### Definition of $ q(x_t|x_0) $
+
+%% Cell type:code id:cb10e05b tags:
+
+``` python
+# extract the values needed for time t
+def extract(constants, batch_t, x_shape):
+    diffusion_batch_size = batch_t.shape[0]
+
+    # get a list of the appropriate constants of each timesteps
+    out = constants.gather(-1, batch_t.cpu())
+
+    return out.reshape(diffusion_batch_size, *((1,) * (len(x_shape) - 1))).to(batch_t.device)
+```
+
+%% Cell type:markdown id:2f5991bd tags:
+
+Now that we have every constants that we need, we can create a function that will add noise to an image following the forward diffusion process. This function (q_sample) corresponds to $ q(x_t|x_0) $:
+
+![q_sample](https://docs.google.com/drawings/d/e/2PACX-1vQJ55FfJZ8FehNhnIEEeWUDaOAZqK5BuaadB9Xacx2bA222nNApwMHYzhgILaUrze_pTlc974BELJ2D/pub?w=3210&h=651)
+
+%% Cell type:code id:28645450 tags:
+
+``` python
+# forward diffusion (using the nice property)
+def q_sample(constants_dict, batch_x0, batch_t, noise=None):
+    if noise is None:
+        noise = torch.randn_like(batch_x0)
+
+    sqrt_alphas_cumprod_t = extract(constants_dict['sqrt_alphas_cumprod'], batch_t, batch_x0.shape)
+    sqrt_one_minus_alphas_cumprod_t = extract(
+        constants_dict['sqrt_one_minus_alphas_cumprod'], batch_t, batch_x0.shape
+    )
+
+    return sqrt_alphas_cumprod_t * batch_x0 + sqrt_one_minus_alphas_cumprod_t * noise
+```
+
+%% Cell type:markdown id:dcc05f40 tags:
+
+We can now visualize how the forward diffusion process adds noise gradually the image according to its parameters:
+
+%% Cell type:code id:7ed20740 tags:
+
+``` python
+T = 1000
+const_linear_dict = get_alph_bet(T, schedule=linear_beta_schedule)
+const_cosine_dict = get_alph_bet(T, schedule=cosine_beta_schedule)
+
+batch_t = torch.arange(batch_size)*(T//batch_size)  # get a range of timesteps from 0 to T
+print(f"timesteps: {batch_t}")
+noisy_batch_linear = q_sample(const_linear_dict, batch_image, batch_t, noise=None)
+noisy_batch_cosine = q_sample(const_cosine_dict, batch_image, batch_t, noise=None)
+
+print("Original images:")
+show_images(batch_image[:])
+
+print("Noised images with linear shedule:")
+show_images(noisy_batch_linear[:])
+
+print("Noised images with cosine shedule:")
+show_images(noisy_batch_cosine[:])
+```
+
+%% Cell type:markdown id:565d3c80 tags:
+
+## Reverse Diffusion Process
+
+%% Cell type:markdown id:251808b0 tags:
+
+### Model definition
+The reverse diffusion process is made by a deep learning model. We choosed a Unet model with attention. The model is optimized following some papers like [ConvNeXt](https://arxiv.org/pdf/2201.03545.pdf). You can inspect the model in the model.py file.
+
+%% Cell type:code id:29f00028 tags:
+
+``` python
+from model import Unet
+
+model = Unet(
+    dim=28,
+    init_dim=None,
+    out_dim=None,
+    dim_mults=(1, 2, 4),
+    channels=1,
+    with_time_emb=True,
+    convnext_mult=2,
+)
+```
+
+%% Cell type:markdown id:0aaf936c tags:
+
+### Definition of $ p_{\theta}(x_{t-1}|x_t) $
+Now we need a function to retrieve $x_{t-1}$ from $x_t$ and the predicted $z_t$. It corresponds to the reverse diffusion kernel:
+![p_sample](https://docs.google.com/drawings/d/e/2PACX-1vRogMTbBI_MtUz2WvFRKef0IKSNaKuFe475llm8nARBbvVCxezq4L00wJV7HjJSLm5mvODncdHDQvKq/pub?w=4407&h=679)
+
+%% Cell type:code id:00443d8e tags:
+
+``` python
+@torch.no_grad()
+def p_sample(constants_dict, batch_xt, predicted_noise, batch_t):
+    # We first get every constants needed and send them in right device
+    betas_t = extract(constants_dict['betas'], batch_t, batch_xt.shape).to(batch_xt.device)
+    sqrt_one_minus_alphas_cumprod_t = extract(
+        constants_dict['sqrt_one_minus_alphas_cumprod'], batch_t, batch_xt.shape
+    ).to(batch_xt.device)
+    sqrt_recip_alphas_t = extract(
+        constants_dict['sqrt_recip_alphas'], batch_t, batch_xt.shape
+    ).to(batch_xt.device)
+
+    # Equation 11 in the ddpm paper
+    # Use predicted noise to predict the mean (mu theta)
+    model_mean = sqrt_recip_alphas_t * (
+        batch_xt - betas_t * predicted_noise / sqrt_one_minus_alphas_cumprod_t
+    )
+
+    # We have to be careful to not add noise if we want to predict the final image
+    predicted_image = torch.zeros(batch_xt.shape).to(batch_xt.device)
+    t_zero_index = (batch_t == torch.zeros(batch_t.shape).to(batch_xt.device))
+
+    # Algorithm 2 line 4, we add noise when timestep is not 1:
+    posterior_variance_t = extract(constants_dict['posterior_variance'], batch_t, batch_xt.shape)
+    noise = torch.randn_like(batch_xt)  # create noise, same shape as batch_x
+    predicted_image[~t_zero_index] = model_mean[~t_zero_index] + (
+        torch.sqrt(posterior_variance_t[~t_zero_index]) * noise[~t_zero_index]
+    )
+
+    # If t=1 we don't add noise to mu
+    predicted_image[t_zero_index] = model_mean[t_zero_index]
+
+    return predicted_image
+```
+
+%% Cell type:markdown id:c6e13aa1 tags:
+
+## Sampling
+
+%% Cell type:markdown id:459df8a2 tags:
+
+We will now create the sampling function. Given trained model, it should generate all the images we want.
+
+%% Cell type:markdown id:1e3cdf15 tags:
+
+With the reverse diffusion process and a trained model, we can now make the sampling function corresponding to this algorithm:
+![sampling](https://docs.google.com/drawings/d/e/2PACX-1vT205aFxllD7gspWypXkoJVvkftJU0B0AiBbHZvZvmHFx_ntqY0oofBD_i874FNrrbJ1CWrOwWwLtUg/pub?w=1398&h=671)
+
+%% Cell type:code id:710ef636 tags:
+
+``` python
+# Algorithm 2 (including returning all images)
+@torch.no_grad()
+def sampling(model, shape, T, constants_dict):
+    b = shape[0]
+    # start from pure noise (for each example in the batch)
+    batch_xt = torch.randn(shape, device=DEVICE)
+
+    batch_t = torch.ones(shape[0]) * T  # create a vector with batch-size time the timestep
+    batch_t = batch_t.type(torch.int64).to(DEVICE)
+
+    imgs = []
+
+    for t in tqdm(reversed(range(0, T)), desc='sampling loop time step', total=T):
+        batch_t -= 1
+        predicted_noise = model(batch_xt, batch_t)
+
+        batch_xt = p_sample(constants_dict, batch_xt, predicted_noise, batch_t)
+
+        imgs.append(batch_xt.cpu())
+
+    return imgs
+```
+
+%% Cell type:markdown id:df50675e tags:
+
+## Training
+We will instantiate every objects needed with fixed parameters here. We can try different hyperparameters by coming back here and changing the parameters.
+
+%% Cell type:code id:a3884522 tags:
+
+``` python
+# Dataset parameters
+batch_size = 64
+data_path = "/gpfsdswork/dataset/HuggingFace/fashion_mnist/fashion_mnist/"
+# data_path = "fashion_mnist"  # If you're not using Jean Zay
+train_dataloader, channels, image_size, len_dataloader = get_dataset(data_path, batch_size)
+```
+
+%% Cell type:code id:b6b4a2bd tags:
+
+``` python
+constants_dict = get_alph_bet(T, schedule=linear_beta_schedule)
+```
+
+%% Cell type:code id:ba387427 tags:
+
+``` python
+epochs = 3
+T = 1000  # = T
+```
+
+%% Cell type:code id:31933494 tags:
+
+``` python
+model = Unet(
+    dim=image_size,
+    init_dim=None,
+    out_dim=None,
+    dim_mults=(1, 2, 4),
+    channels=channels,
+    with_time_emb=True,
+    convnext_mult=2,
+).to(DEVICE)
+```
+
+%% Cell type:code id:92fb2a17 tags:
+
+``` python
+criterion = nn.SmoothL1Loss()
+optimizer = Adam(model.parameters(), lr=1e-4)
+```
+
+%% Cell type:markdown id:f059d28f tags:
+
+### Training loop
+![training_algorithm](https://docs.google.com/drawings/d/e/2PACX-1vRZYVrTttVD1qk5YjVT_CmQfFz2kR2cIqIMHKV4QE6LWU67mUl14NJowz-GKldITkFwsR5iM6w3epKl/pub?w=1395&h=670)
+
+%% Cell type:code id:4bab979d tags:
+
+``` python
+for epoch in range(epochs):
+    loop = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs}")
+    for batch in loop:
+        optimizer.zero_grad()
+
+        batch_size_iter = batch["pixel_values"].shape[0]
+        batch_image = batch["pixel_values"].to(DEVICE)
+
+        # Algorithm 1 line 3: sample t uniformally for every example in the batch
+        batch_t = torch.randint(0, T, (batch_size_iter,), device=DEVICE).long()
+
+        noise = torch.randn_like(batch_image)
+
+        x_noisy = q_sample(constants_dict, batch_image, batch_t, noise=noise)
+        predicted_noise = model(x_noisy, batch_t)
+
+        loss = criterion(noise, predicted_noise)
+
+        loop.set_postfix(loss=loss.item())
+
+        loss.backward()
+        optimizer.step()
+
+
+print("check generation:")
+list_gen_imgs = sampling(model, (batch_size, channels, image_size, image_size), T, constants_dict)
+show_images(list_gen_imgs[-1])
+
+```
+
+%% Cell type:markdown id:2489e819 tags:
+
+## View of the diffusion process
+
+%% Cell type:code id:09ce451d tags:
+
+``` python
+def make_gif(frame_list):
+    to_pil = ToPILImage()
+    frames = [to_pil(make_grid(normalize_im(tens_im))) for tens_im in frame_list]
+    frame_one = frames[0]
+    frame_one.save("sampling.gif.png", format="GIF", append_images=frames[::5], save_all=True, duration=10, loop=0)
+
+    return IPython.display.Image(filename="./sampling.gif.png")
+```
+
+%% Cell type:code id:4f665ac3 tags:
+
+``` python
+make_gif(list_gen_imgs)
+```
+
+%% Cell type:markdown id:bfa40b6b tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id:756b572d tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [DDPM1] - Fashion MNIST Generation with DDPM
+<!-- DESC --> Diffusion Model example, to generate Fashion MNIST images.
+
+<!-- AUTHOR : Hatim Bourfoune (CNRS/IDRIS), Maxime Song (CNRS/IDRIS) -->
+
+## Objectives :
+ - Understanding and implementing a **Diffusion Model** neurals network (DDPM)
+
+The calculation needs being important, it is preferable to use a very simple dataset such as MNIST to start with.
+...MNIST with a small scale (need to adapt the code !) if you haven't a GPU ;-)
+
+
+## Acknowledgements :
+This notebook was heavily inspired by this [article](https://huggingface.co/blog/annotated-diffusion) and this [notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/annotated_diffusion.ipynb#scrollTo=5153024b).
+
+%% Cell type:code id:54a15542 tags:
+
+``` python
+import math
+from inspect import isfunction
+from functools import partial
+import random
+import IPython
+
+import matplotlib.pyplot as plt
+from tqdm.auto import tqdm
+from einops import rearrange
+
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+from datasets import load_dataset, load_from_disk
+
+from torchvision import transforms
+from torchvision.utils import make_grid
+from torch.utils.data import DataLoader
+import numpy as np
+from PIL import Image
+from torch.optim import Adam
+
+from torchvision.transforms import Compose, ToTensor, Lambda, ToPILImage, CenterCrop, Resize
+import matplotlib.pyplot as plt
+```
+
+%% Cell type:code id:a854c28a tags:
+
+``` python
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Reproductibility
+torch.manual_seed(53)
+random.seed(53)
+np.random.seed(53)
+```
+
+%% Cell type:markdown id:e33f10db tags:
+
+## Create dataset
+We will use the library HuggingFace Datasets to get our Fashion MNIST. If you are using Jean Zay, the dataset is already downloaded in the DSDIR, so you can use the code as it is. If you are not using Jean Zay, you should use the function load_dataset (commented) instead of load_from_disk. It will automatically download the dataset if it is not downloaded already.
+
+%% Cell type:code id:918c0138 tags:
+
+``` python
+dataset = load_dataset("fashion_mnist")
+dataset
+```
+
+%% Cell type:markdown id:cfe4d4f5 tags:
+
+As you can see the dataset is composed of two subparts: train and test. So the dataset is already split for us. We'll use the train part for now. <br/>
+We can also see that the dataset as two features per sample: 'image' corresponding to the PIL version of the image and 'label' corresponding to the class of the image (shoe, shirt...). We can also see that there are 60 000 samples in our train dataset.
+
+%% Cell type:code id:2280400d tags:
+
+``` python
+train_dataset = dataset['train']
+train_dataset[0]
+```
+
+%% Cell type:markdown id:7978ad3d tags:
+
+Each sample of a HuggingFace dataset is a dictionary containing the data.
+
+%% Cell type:code id:0d157e11 tags:
+
+``` python
+image = train_dataset[0]['image']
+image
+```
+
+%% Cell type:code id:5dea3e5a tags:
+
+``` python
+image_array = np.asarray(image, dtype=np.uint8)
+print(f"shape of the image: {image_array.shape}")
+print(f"min: {image_array.min()}, max: {image_array.max()}")
+```
+
+%% Cell type:markdown id:f86937e9 tags:
+
+We will now create a function that get the Fashion MNIST dataset needed, apply all the transformations we want on it and encapsulate that dataset in a dataloader.
+
+%% Cell type:code id:e646a7b1 tags:
+
+``` python
+# load hugging face dataset from the DSDIR
+def get_dataset(data_path, batch_size, test = False):
+
+    dataset = load_from_disk(data_path)
+    # dataset = load_dataset(data_path)  # Use this one if you're not on Jean Zay
+
+    # define image transformations (e.g. using torchvision)
+    transform = Compose([
+        transforms.RandomHorizontalFlip(),  # Data augmentation
+        transforms.ToTensor(),  # Transform PIL image into tensor of value between [0,1]
+        transforms.Lambda(lambda t: (t * 2) - 1)  # Normalize values between [-1,1]
+    ])
+
+    # define function for HF dataset transform
+    def transforms_im(examples):
+        examples['pixel_values'] = [transform(image) for image in examples['image']]
+        del examples['image']
+        return examples
+
+    dataset = dataset.with_transform(transforms_im).remove_columns('label')  # We don't need it
+    channels, image_size, _ = dataset['train'][0]['pixel_values'].shape
+
+    if test:
+        dataloader = DataLoader(dataset['test'], batch_size=batch_size)
+    else:
+        dataloader = DataLoader(dataset['train'], batch_size=batch_size, shuffle=True)
+
+    len_dataloader = len(dataloader)
+    print(f"channels: {channels}, image dimension: {image_size}, len_dataloader: {len_dataloader}")
+
+    return dataloader, channels, image_size, len_dataloader
+```
+
+%% Cell type:markdown id:413a3fea tags:
+
+We choose the parameters and we instantiate the dataset:
+
+%% Cell type:code id:918233da tags:
+
+``` python
+# Dataset parameters
+batch_size = 64
+data_path = "/gpfsdswork/dataset/HuggingFace/fashion_mnist/fashion_mnist/"
+# data_path = "fashion_mnist"  # If you're not using Jean Zay
+```
+
+%% Cell type:code id:85939f9d tags:
+
+``` python
+train_dataloader, channels, image_size, len_dataloader = get_dataset(data_path, batch_size)
+
+batch_image = next(iter(train_dataloader))['pixel_values']
+batch_image.shape
+```
+
+%% Cell type:markdown id:104db929 tags:
+
+We also create a function that allows us to see a batch of images:
+
+%% Cell type:code id:196370c2 tags:
+
+``` python
+def normalize_im(images):
+    shape = images.shape
+    images = images.view(shape[0], -1)
+    images -= images.min(1, keepdim=True)[0]
+    images /= images.max(1, keepdim=True)[0]
+    return images.view(shape)
+
+def show_images(batch):
+    plt.imshow(torch.permute(make_grid(normalize_im(batch)), (1,2,0)))
+    plt.show()
+```
+
+%% Cell type:code id:96334e60 tags:
+
+``` python
+show_images(batch_image[:])
+```
+
+%% Cell type:markdown id:1befee67 tags:
+
+## Forward Diffusion
+The aim of this part is to create a function that will add noise to any image at any step (following the DDPM diffusion process).
+
+%% Cell type:markdown id:231629ad tags:
+
+### Beta scheduling
+First, we create a function that will compute every betas of every steps (following a specific shedule). We will only create a function for the linear schedule (original DDPM) and the cosine schedule (improved DDPM):
+
+%% Cell type:code id:0039d38d tags:
+
+``` python
+# Different type of beta schedule
+def linear_beta_schedule(timesteps, beta_start = 0.0001, beta_end = 0.02):
+    """
+    linar schedule from the original DDPM paper https://arxiv.org/abs/2006.11239
+    """
+    return torch.linspace(beta_start, beta_end, timesteps)
+
+
+def cosine_beta_schedule(timesteps, s=0.008):
+    """
+    cosine schedule as proposed in https://arxiv.org/abs/2102.09672
+    """
+    steps = timesteps + 1
+    x = torch.linspace(0, timesteps, steps)
+    alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * torch.pi * 0.5) ** 2
+    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
+    betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
+    return torch.clip(betas, 0.0001, 0.9999)
+```
+
+%% Cell type:markdown id:e18d1b38 tags:
+
+### Constants calculation
+We will now create a function to calculate every constants we need for our Diffusion Model. <br/>
+Constants:
+- $ \beta_t $: betas
+- $ \sqrt{\frac{1}{\alpha_t}} $: sqrt_recip_alphas
+- $ \sqrt{\bar{\alpha}_t} $: sqrt_alphas_cumprod
+- $ \sqrt{1-\bar{\alpha}_t} $: sqrt_one_minus_alphas_cumprod
+- $ \tilde{\beta}_t = \beta_t\frac{1-\bar{\alpha}_{t-1}}{1-\bar{\alpha}_t} $: posterior_variance
+
+%% Cell type:code id:84251513 tags:
+
+``` python
+# Function to get alphas and betas
+def get_alph_bet(timesteps, schedule=cosine_beta_schedule):
+
+    # define beta
+    betas = schedule(timesteps)
+
+    # define alphas
+    alphas = 1. - betas
+    alphas_cumprod = torch.cumprod(alphas, axis=0) # cumulative product of alpha
+    alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0)  # corresponding to the prev const
+    sqrt_recip_alphas = torch.sqrt(1.0 / alphas)
+
+    # calculations for diffusion q(x_t | x_{t-1}) and others
+    sqrt_alphas_cumprod = torch.sqrt(alphas_cumprod)
+    sqrt_one_minus_alphas_cumprod = torch.sqrt(1. - alphas_cumprod)
+
+    # calculations for posterior q(x_{t-1} | x_t, x_0)
+    posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod)
+
+    const_dict = {
+        'betas': betas,
+        'sqrt_recip_alphas': sqrt_recip_alphas,
+        'sqrt_alphas_cumprod': sqrt_alphas_cumprod,
+        'sqrt_one_minus_alphas_cumprod': sqrt_one_minus_alphas_cumprod,
+        'posterior_variance': posterior_variance
+    }
+
+    return const_dict
+```
+
+%% Cell type:markdown id:d5658d8e tags:
+
+### Difference between Linear and Cosine schedule
+We can check the differences between the constants when we change the parameters:
+
+%% Cell type:code id:7bfdf98c tags:
+
+``` python
+T = 1000
+const_linear_dict = get_alph_bet(T, schedule=linear_beta_schedule)
+const_cosine_dict = get_alph_bet(T, schedule=cosine_beta_schedule)
+
+plt.plot(np.arange(T), const_linear_dict['sqrt_alphas_cumprod'], color='r', label='linear')
+plt.plot(np.arange(T), const_cosine_dict['sqrt_alphas_cumprod'], color='g', label='cosine')
+
+# Naming the x-axis, y-axis and the whole graph
+plt.xlabel("Step")
+plt.ylabel("alpha_bar")
+plt.title("Linear and Cosine schedules")
+
+# Adding legend, which helps us recognize the curve according to it's color
+plt.legend()
+
+# To load the display window
+plt.show()
+```
+
+%% Cell type:markdown id:b1537984 tags:
+
+### Definition of $ q(x_t|x_0) $
+
+%% Cell type:code id:cb10e05b tags:
+
+``` python
+# extract the values needed for time t
+def extract(constants, batch_t, x_shape):
+    diffusion_batch_size = batch_t.shape[0]
+
+    # get a list of the appropriate constants of each timesteps
+    out = constants.gather(-1, batch_t.cpu())
+
+    return out.reshape(diffusion_batch_size, *((1,) * (len(x_shape) - 1))).to(batch_t.device)
+```
+
+%% Cell type:markdown id:2f5991bd tags:
+
+Now that we have every constants that we need, we can create a function that will add noise to an image following the forward diffusion process. This function (q_sample) corresponds to $ q(x_t|x_0) $:
+
+![q_sample](https://docs.google.com/drawings/d/e/2PACX-1vQJ55FfJZ8FehNhnIEEeWUDaOAZqK5BuaadB9Xacx2bA222nNApwMHYzhgILaUrze_pTlc974BELJ2D/pub?w=3210&h=651)
+
+%% Cell type:code id:28645450 tags:
+
+``` python
+# forward diffusion (using the nice property)
+def q_sample(constants_dict, batch_x0, batch_t, noise=None):
+    if noise is None:
+        noise = torch.randn_like(batch_x0)
+
+    sqrt_alphas_cumprod_t = extract(constants_dict['sqrt_alphas_cumprod'], batch_t, batch_x0.shape)
+    sqrt_one_minus_alphas_cumprod_t = extract(
+        constants_dict['sqrt_one_minus_alphas_cumprod'], batch_t, batch_x0.shape
+    )
+
+    return sqrt_alphas_cumprod_t * batch_x0 + sqrt_one_minus_alphas_cumprod_t * noise
+```
+
+%% Cell type:markdown id:dcc05f40 tags:
+
+We can now visualize how the forward diffusion process adds noise gradually the image according to its parameters:
+
+%% Cell type:code id:7ed20740 tags:
+
+``` python
+T = 1000
+const_linear_dict = get_alph_bet(T, schedule=linear_beta_schedule)
+const_cosine_dict = get_alph_bet(T, schedule=cosine_beta_schedule)
+
+batch_t = torch.arange(batch_size)*(T//batch_size)  # get a range of timesteps from 0 to T
+print(f"timesteps: {batch_t}")
+noisy_batch_linear = q_sample(const_linear_dict, batch_image, batch_t, noise=None)
+noisy_batch_cosine = q_sample(const_cosine_dict, batch_image, batch_t, noise=None)
+
+print("Original images:")
+show_images(batch_image[:])
+
+print("Noised images with linear shedule:")
+show_images(noisy_batch_linear[:])
+
+print("Noised images with cosine shedule:")
+show_images(noisy_batch_cosine[:])
+```
+
+%% Cell type:markdown id:565d3c80 tags:
+
+## Reverse Diffusion Process
+
+%% Cell type:markdown id:251808b0 tags:
+
+### Model definition
+The reverse diffusion process is made by a deep learning model. We choosed a Unet model with attention. The model is optimized following some papers like [ConvNeXt](https://arxiv.org/pdf/2201.03545.pdf). You can inspect the model in the model.py file.
+
+%% Cell type:code id:29f00028 tags:
+
+``` python
+from model import Unet
+
+model = Unet(
+    dim=28,
+    init_dim=None,
+    out_dim=None,
+    dim_mults=(1, 2, 4),
+    channels=1,
+    with_time_emb=True,
+    convnext_mult=2,
+)
+```
+
+%% Cell type:markdown id:0aaf936c tags:
+
+### Definition of $ p_{\theta}(x_{t-1}|x_t) $
+Now we need a function to retrieve $x_{t-1}$ from $x_t$ and the predicted $z_t$. It corresponds to the reverse diffusion kernel:
+![p_sample](https://docs.google.com/drawings/d/e/2PACX-1vRogMTbBI_MtUz2WvFRKef0IKSNaKuFe475llm8nARBbvVCxezq4L00wJV7HjJSLm5mvODncdHDQvKq/pub?w=4407&h=679)
+
+%% Cell type:code id:00443d8e tags:
+
+``` python
+@torch.no_grad()
+def p_sample(constants_dict, batch_xt, predicted_noise, batch_t):
+    # We first get every constants needed and send them in right device
+    betas_t = extract(constants_dict['betas'], batch_t, batch_xt.shape).to(batch_xt.device)
+    sqrt_one_minus_alphas_cumprod_t = extract(
+        constants_dict['sqrt_one_minus_alphas_cumprod'], batch_t, batch_xt.shape
+    ).to(batch_xt.device)
+    sqrt_recip_alphas_t = extract(
+        constants_dict['sqrt_recip_alphas'], batch_t, batch_xt.shape
+    ).to(batch_xt.device)
+
+    # Equation 11 in the ddpm paper
+    # Use predicted noise to predict the mean (mu theta)
+    model_mean = sqrt_recip_alphas_t * (
+        batch_xt - betas_t * predicted_noise / sqrt_one_minus_alphas_cumprod_t
+    )
+
+    # We have to be careful to not add noise if we want to predict the final image
+    predicted_image = torch.zeros(batch_xt.shape).to(batch_xt.device)
+    t_zero_index = (batch_t == torch.zeros(batch_t.shape).to(batch_xt.device))
+
+    # Algorithm 2 line 4, we add noise when timestep is not 1:
+    posterior_variance_t = extract(constants_dict['posterior_variance'], batch_t, batch_xt.shape)
+    noise = torch.randn_like(batch_xt)  # create noise, same shape as batch_x
+    predicted_image[~t_zero_index] = model_mean[~t_zero_index] + (
+        torch.sqrt(posterior_variance_t[~t_zero_index]) * noise[~t_zero_index]
+    )
+
+    # If t=1 we don't add noise to mu
+    predicted_image[t_zero_index] = model_mean[t_zero_index]
+
+    return predicted_image
+```
+
+%% Cell type:markdown id:c6e13aa1 tags:
+
+## Sampling
+
+%% Cell type:markdown id:459df8a2 tags:
+
+We will now create the sampling function. Given trained model, it should generate all the images we want.
+
+%% Cell type:markdown id:1e3cdf15 tags:
+
+With the reverse diffusion process and a trained model, we can now make the sampling function corresponding to this algorithm:
+![sampling](https://docs.google.com/drawings/d/e/2PACX-1vT205aFxllD7gspWypXkoJVvkftJU0B0AiBbHZvZvmHFx_ntqY0oofBD_i874FNrrbJ1CWrOwWwLtUg/pub?w=1398&h=671)
+
+%% Cell type:code id:710ef636 tags:
+
+``` python
+# Algorithm 2 (including returning all images)
+@torch.no_grad()
+def sampling(model, shape, T, constants_dict):
+    b = shape[0]
+    # start from pure noise (for each example in the batch)
+    batch_xt = torch.randn(shape, device=DEVICE)
+
+    batch_t = torch.ones(shape[0]) * T  # create a vector with batch-size time the timestep
+    batch_t = batch_t.type(torch.int64).to(DEVICE)
+
+    imgs = []
+
+    for t in tqdm(reversed(range(0, T)), desc='sampling loop time step', total=T):
+        batch_t -= 1
+        predicted_noise = model(batch_xt, batch_t)
+
+        batch_xt = p_sample(constants_dict, batch_xt, predicted_noise, batch_t)
+
+        imgs.append(batch_xt.cpu())
+
+    return imgs
+```
+
+%% Cell type:markdown id:df50675e tags:
+
+## Training
+We will instantiate every objects needed with fixed parameters here. We can try different hyperparameters by coming back here and changing the parameters.
+
+%% Cell type:code id:a3884522 tags:
+
+``` python
+# Dataset parameters
+batch_size = 64
+data_path = "/gpfsdswork/dataset/HuggingFace/fashion_mnist/fashion_mnist/"
+# data_path = "fashion_mnist"  # If you're not using Jean Zay
+train_dataloader, channels, image_size, len_dataloader = get_dataset(data_path, batch_size)
+```
+
+%% Cell type:code id:b6b4a2bd tags:
+
+``` python
+constants_dict = get_alph_bet(T, schedule=linear_beta_schedule)
+```
+
+%% Cell type:code id:ba387427 tags:
+
+``` python
+epochs = 3
+T = 1000  # = T
+```
+
+%% Cell type:code id:31933494 tags:
+
+``` python
+model = Unet(
+    dim=image_size,
+    init_dim=None,
+    out_dim=None,
+    dim_mults=(1, 2, 4),
+    channels=channels,
+    with_time_emb=True,
+    convnext_mult=2,
+).to(DEVICE)
+```
+
+%% Cell type:code id:92fb2a17 tags:
+
+``` python
+criterion = nn.SmoothL1Loss()
+optimizer = Adam(model.parameters(), lr=1e-4)
+```
+
+%% Cell type:markdown id:f059d28f tags:
+
+### Training loop
+![training_algorithm](https://docs.google.com/drawings/d/e/2PACX-1vRZYVrTttVD1qk5YjVT_CmQfFz2kR2cIqIMHKV4QE6LWU67mUl14NJowz-GKldITkFwsR5iM6w3epKl/pub?w=1395&h=670)
+
+%% Cell type:code id:4bab979d tags:
+
+``` python
+for epoch in range(epochs):
+    loop = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs}")
+    for batch in loop:
+        optimizer.zero_grad()
+
+        batch_size_iter = batch["pixel_values"].shape[0]
+        batch_image = batch["pixel_values"].to(DEVICE)
+
+        # Algorithm 1 line 3: sample t uniformally for every example in the batch
+        batch_t = torch.randint(0, T, (batch_size_iter,), device=DEVICE).long()
+
+        noise = torch.randn_like(batch_image)
+
+        x_noisy = q_sample(constants_dict, batch_image, batch_t, noise=noise)
+        predicted_noise = model(x_noisy, batch_t)
+
+        loss = criterion(noise, predicted_noise)
+
+        loop.set_postfix(loss=loss.item())
+
+        loss.backward()
+        optimizer.step()
+
+
+print("check generation:")
+list_gen_imgs = sampling(model, (batch_size, channels, image_size, image_size), T, constants_dict)
+show_images(list_gen_imgs[-1])
+
+```
+
+%% Cell type:markdown id:2489e819 tags:
+
+## View of the diffusion process
+
+%% Cell type:code id:09ce451d tags:
+
+``` python
+def make_gif(frame_list):
+    to_pil = ToPILImage()
+    frames = [to_pil(make_grid(normalize_im(tens_im))) for tens_im in frame_list]
+    frame_one = frames[0]
+    frame_one.save("sampling.gif.png", format="GIF", append_images=frames[::5], save_all=True, duration=10, loop=0)
+
+    return IPython.display.Image(filename="./sampling.gif.png")
+```
+
+%% Cell type:code id:4f665ac3 tags:
+
+``` python
+make_gif(list_gen_imgs)
+```
+
+%% Cell type:markdown id:bfa40b6b tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/DDPM.PyTorch/model.py
+++ b/DDPM.PyTorch/model.py
+
+# <!-- TITLE --> [DDPM2] - DDPM Python classes
+# <!-- DESC --> Python classes used by DDMP Example
+# <!-- AUTHOR : Hatim Bourfoune (CNRS/IDRIS), Maxime Song (CNRS/IDRIS) -->
+
+
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+from inspect import isfunction
+from functools import partial
+import math
+from einops import rearrange
+
+
+def exists(x):
+    return x is not None
+
+
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+
+
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+
+    def forward(self, x, *args, **kwargs):
+        return self.fn(x, *args, **kwargs) + x
+    
+
+def Upsample(dim):
+    return nn.ConvTranspose2d(dim, dim, 4, 2, 1)
+
+
+def Downsample(dim):
+    return nn.Conv2d(dim, dim, 4, 2, 1)
+
+
+class SinusoidalPositionEmbeddings(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, time):
+        device = time.device
+        half_dim = self.dim // 2
+        embeddings = math.log(10000) / (half_dim - 1)
+        embeddings = torch.exp(torch.arange(half_dim, device=device) * -embeddings)
+        embeddings = time[:, None] * embeddings[None, :]
+        embeddings = torch.cat((embeddings.sin(), embeddings.cos()), dim=-1)
+        return embeddings
+
+    
+class ConvNextBlock(nn.Module):
+    """https://arxiv.org/abs/2201.03545"""
+
+    def __init__(self, dim, dim_out, *, time_emb_dim=None, mult=2, norm=True):
+        super().__init__()
+        self.mlp = (
+            nn.Sequential(nn.GELU(), nn.Linear(time_emb_dim, dim))
+            if exists(time_emb_dim)
+            else None
+        )
+
+        self.ds_conv = nn.Conv2d(dim, dim, 7, padding=3, groups=dim)
+
+        self.net = nn.Sequential(
+            nn.GroupNorm(1, dim) if norm else nn.Identity(),
+            nn.Conv2d(dim, dim_out * mult, 3, padding=1),
+            nn.GELU(),
+            nn.GroupNorm(1, dim_out * mult),
+            nn.Conv2d(dim_out * mult, dim_out, 3, padding=1),
+        )
+
+        self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
+
+    def forward(self, x, time_emb=None):
+        h = self.ds_conv(x)
+
+        if exists(self.mlp) and exists(time_emb):
+            assert exists(time_emb), "time embedding must be passed in"
+            condition = self.mlp(time_emb)
+            h = h + rearrange(condition, "b c -> b c 1 1")
+
+        h = self.net(h)
+        return h + self.res_conv(x)
+
+
+class Attention(nn.Module):
+    def __init__(self, dim, heads=4, dim_head=32):
+        super().__init__()
+        self.scale = dim_head**-0.5
+        self.heads = heads
+        hidden_dim = dim_head * heads
+        self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False)
+        self.to_out = nn.Conv2d(hidden_dim, dim, 1)
+
+    def forward(self, x):
+        b, c, h, w = x.shape
+        qkv = self.to_qkv(x).chunk(3, dim=1)
+        q, k, v = map(
+            lambda t: rearrange(t, "b (h c) x y -> b h c (x y)", h=self.heads), qkv
+        )
+        q = q * self.scale
+
+        sim = einsum("b h d i, b h d j -> b h i j", q, k)
+        sim = sim - sim.amax(dim=-1, keepdim=True).detach()
+        attn = sim.softmax(dim=-1)
+
+        out = einsum("b h i j, b h d j -> b h i d", attn, v)
+        out = rearrange(out, "b h (x y) d -> b (h d) x y", x=h, y=w)
+        return self.to_out(out)
+
+
+class LinearAttention(nn.Module):
+    def __init__(self, dim, heads=4, dim_head=32):
+        super().__init__()
+        self.scale = dim_head**-0.5
+        self.heads = heads
+        hidden_dim = dim_head * heads
+        self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False)
+
+        self.to_out = nn.Sequential(nn.Conv2d(hidden_dim, dim, 1), 
+                                    nn.GroupNorm(1, dim))
+
+    def forward(self, x):
+        b, c, h, w = x.shape
+        qkv = self.to_qkv(x).chunk(3, dim=1)
+        q, k, v = map(
+            lambda t: rearrange(t, "b (h c) x y -> b h c (x y)", h=self.heads), qkv
+        )
+
+        q = q.softmax(dim=-2)
+        k = k.softmax(dim=-1)
+
+        q = q * self.scale
+        context = torch.einsum("b h d n, b h e n -> b h d e", k, v)
+
+        out = torch.einsum("b h d e, b h d n -> b h e n", context, q)
+        out = rearrange(out, "b h c (x y) -> b (h c) x y", h=self.heads, x=h, y=w)
+        return self.to_out(out)
+
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.fn = fn
+        self.norm = nn.GroupNorm(1, dim)
+
+    def forward(self, x):
+        x = self.norm(x)
+        return self.fn(x)
+
+
+class Unet(nn.Module):
+    def __init__(
+        self,
+        dim,
+        init_dim=None,
+        out_dim=None,
+        dim_mults=(1, 2, 4, 8),
+        channels=3,
+        with_time_emb=True,
+        convnext_mult=2,
+    ):
+        super().__init__()
+
+        # determine dimensions
+        self.channels = channels
+
+        init_dim = default(init_dim, dim // 3 * 2)
+        self.init_conv = nn.Conv2d(channels, init_dim, 7, padding=3)
+
+        dims = [init_dim, *map(lambda m: dim * m, dim_mults)]
+        in_out = list(zip(dims[:-1], dims[1:]))
+        
+        block_klass = partial(ConvNextBlock, mult=convnext_mult)
+
+        # time embeddings
+        if with_time_emb:
+            time_dim = dim * 4
+            self.time_mlp = nn.Sequential(
+                SinusoidalPositionEmbeddings(dim),
+                nn.Linear(dim, time_dim),
+                nn.GELU(),
+                nn.Linear(time_dim, time_dim),
+            )
+        else:
+            time_dim = None
+            self.time_mlp = None
+
+        # layers
+        self.downs = nn.ModuleList([])
+        self.ups = nn.ModuleList([])
+        num_resolutions = len(in_out)
+
+        for ind, (dim_in, dim_out) in enumerate(in_out):
+            is_last = ind >= (num_resolutions - 1)
+
+            self.downs.append(
+                nn.ModuleList(
+                    [
+                        block_klass(dim_in, dim_out, time_emb_dim=time_dim),
+                        block_klass(dim_out, dim_out, time_emb_dim=time_dim),
+                        Residual(PreNorm(dim_out, LinearAttention(dim_out))),
+                        Downsample(dim_out) if not is_last else nn.Identity(),
+                    ]
+                )
+            )
+
+        mid_dim = dims[-1]
+        self.mid_block1 = block_klass(mid_dim, mid_dim, time_emb_dim=time_dim)
+        self.mid_attn = Residual(PreNorm(mid_dim, Attention(mid_dim)))
+        self.mid_block2 = block_klass(mid_dim, mid_dim, time_emb_dim=time_dim)
+
+        for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])):
+            is_last = ind >= (num_resolutions - 1)
+
+            self.ups.append(
+                nn.ModuleList(
+                    [
+                        block_klass(dim_out * 2, dim_in, time_emb_dim=time_dim),
+                        block_klass(dim_in, dim_in, time_emb_dim=time_dim),
+                        Residual(PreNorm(dim_in, LinearAttention(dim_in))),
+                        Upsample(dim_in) if not is_last else nn.Identity(),
+                    ]
+                )
+            )
+
+        out_dim = default(out_dim, channels)
+        self.final_conv = nn.Sequential(
+            block_klass(dim, dim), nn.Conv2d(dim, out_dim, 1)
+        )
+
+    def forward(self, x, time):
+        x = self.init_conv(x)
+
+        t = self.time_mlp(time) if exists(self.time_mlp) else None
+
+        h = []
+
+        # downsample
+        for block1, block2, attn, downsample in self.downs:
+            x = block1(x, t)
+            x = block2(x, t)
+            x = attn(x)
+            h.append(x)
+            x = downsample(x)
+
+        # bottleneck
+        x = self.mid_block1(x, t)
+        x = self.mid_attn(x)
+        x = self.mid_block2(x, t)
+
+        # upsample
+        for block1, block2, attn, upsample in self.ups:
+            x = torch.cat((x, h.pop()), dim=1)
+            x = block1(x, t)
+            x = block2(x, t)
+            x = attn(x)
+            x = upsample(x)
+
+        return self.final_conv(x)    
--- a/DDPM.PyTorch/requirements.txt
+++ b/DDPM.PyTorch/requirements.txt
+python==3.10
+jupyterlab
+pytorch 
+torchvision
+tqdm
+matplotlib
+einops
+datasets
\ No newline at end of file
--- a/DDPM.PyTorch/sampling.gif.png
+++ b/DDPM.PyTorch/sampling.gif.png
--- a/DRL.PyTorch/FIDLE_DQNfromScratch.ipynb
+++ b/DRL.PyTorch/FIDLE_DQNfromScratch.ipynb
+%% Cell type:markdown id:w_5p3EyVknLC tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [DRL1] - Solving CartPole with DQN
+<!-- DESC --> Using a a Deep Q-Network to play CartPole - an inverted pendulum problem (PyTorch)
+<!-- AUTHOR : Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS) -->
+
+
+
+By Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS)
+
+
+
+%% Cell type:markdown id:ucB28wGpmFwi tags:
+
+## Objectives
+
+* Understand the code behind the DQN algorithm
+* Visualize the result for fun purposes :)
+
+This notebook implements a DQN from scratch and trains it. It is simply a vanilla DQN with a target network (sometimes referred as Double DQN). More sophisticated and recent modifications might help stabilize the training.
+
+Considering that we are going to use a tiny network for a simple environment, matrix multiplications are not that time consuming, and using a GPU can be detrimental as communications between CPU and GPU are no longer negligeable compared to forward and backward steps. This notebook will therefore be executed on CPU.
+
+The chosen environment will be imported from the gym toolkit (https://gym.openai.com/).
+
+%% Cell type:markdown id:fqQsB2Jwm-BP tags:
+
+## Demonstration steps:
+
+- Define numerous hyperparameters
+- Implement the Q-Network
+- Implement an agent following the Double DQN algorithm
+- Train it for a few minutes
+- Visualize the result
+
+%% Cell type:markdown id:nRJmgZ0inpkk tags:
+
+## Installations
+
+Gym requires a graphical interface to render a state observation. Xvfb allows to run the notebook headless. This software is not available on Jean Zay's compute node, hence the usage of Google colab.
+
+%% Cell type:code id:y2Y71JbfgkeU tags:
+
+``` python
+!pip3 install pyvirtualdisplay
+!pip install pyglet==1.5.11
+!apt-get install x11-utils > /dev/null 2>&1
+!apt-get install -y xvfb python-opengl > /dev/null 2>&1
+```
+
+%% Cell type:markdown id:q6eYfBKnoOJQ tags:
+
+## Imports
+
+I chose to use Pytorch to implement this DQN due to its straightforward API and personal preferences.
+Gym implements the environment.
+
+%% Cell type:code id:0fc91d65-4756-4432-906c-7d315d981775 tags:
+
+``` python
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+import gym
+from gym import wrappers
+
+import random
+from tqdm.notebook import tqdm
+
+import functools
+import matplotlib.pyplot as plt
+import os
+import io
+import base64
+import glob
+from IPython.display import display, HTML
+```
+
+%% Cell type:markdown id:Hao-RYcdowHn tags:
+
+## Hyperparameters
+
+The size of the replay buffer does not matter much. In this case, it is big enough to hold every transitions we will have in our training. This choice does have a huge impact on memory though.
+
+Warm-up allows the network to gather some information before the training process begins.
+
+The target network will only be updated once every 10k steps in order to stabilize the training.
+
+The exploration rate is linearly decreasing, although an exponential curve is a sound and common choice as well.
+
+As mentioned above, only the CPU will be used, the GPU would be useful for bigger networks, and / or environments which have a torch tensor internal state.
+
+Considering this is a simple DQN implementation, its stability leaves a lot to be desired. In order not to rely on luck, a decent seed was chosen.
+
+%% Cell type:code id:6fX1X6y6YHXF tags:
+
+``` python
+learning_rate = 0.0001
+buffer_size = 200000
+warmup_steps = 10000
+batch_size = 32
+gamma = 0.99
+train_freq = 4
+target_update_interval = 10000
+exploration_fraction = 0.1
+exploration_initial_eps = 1.0
+exploration_final_eps = 0.05
+device = torch.device("cpu") # torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+seed = 987654321
+np.random.seed(seed)
+torch.manual_seed(seed)
+random.seed(seed)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed(seed)
+```
+
+%% Cell type:markdown id:TofGB-s7qfSH tags:
+
+## Q-Network and Agent implementation
+
+%% Cell type:code id:4VhftO9PaE9g tags:
+
+``` python
+class DQN(nn.Module):
+
+    def __init__(self):
+        super(DQN, self).__init__()
+        self.layer1 = nn.Linear(4, 64)
+        self.layer2 = nn.Linear(64, 64)
+        self.layer3 = nn.Linear(64, 2)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.relu(self.layer1(x))
+        x = self.relu(self.layer2(x))
+        return self.layer3(x)
+
+    def compute_target(self, x, rewards):
+        with torch.no_grad():
+            values = torch.zeros(x.shape[0], device=device)
+            values[rewards != 1] = torch.max(self.forward(x[rewards != 1]), dim=-1)[0]
+            values = rewards + gamma * values
+        return values
+
+    def predict(self, x):
+        if len(x.shape) < 2:
+            x = x[None, :]
+        with torch.no_grad():
+            x = torch.argmax(self.forward(x), dim=-1)
+        if x.device.type == "cuda":
+            x = x.cpu()
+        return x
+
+class Agent:
+
+    def __init__(self, env):
+        self.env = env
+        self.q_network = DQN().to(device)
+        self.target_network = DQN().to(device)
+        self.target_network.eval()
+        self.synchronize()
+        self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr=learning_rate)
+        self.criterion = nn.MSELoss()
+        self.buffer = []
+        self.n_updates = 0
+
+    def add_transition(self, state, action, reward, nextState):
+        self.buffer.append((state, action, reward, nextState))
+        if len(self.buffer) > buffer_size:
+            self.buffer.pop(random.randrange(len(self.buffer)))
+
+    def sample(self):
+        transitions = random.sample(self.buffer, batch_size)
+        states, actions, rewards, nextStates = zip(*transitions)
+        states = torch.stack(states).to(device)
+        actions = torch.cat(actions).to(device)
+        rewards = torch.cat(rewards).to(device)
+        nextStates = torch.stack(nextStates).to(device)
+        return states, actions, rewards, nextStates
+
+    def train_step(self, step):
+        if step % target_update_interval == 0:
+            self.synchronize()
+        if step < warmup_steps or step % train_freq != 0:
+            return 0.
+
+        states, actions, rewards, nextStates = self.sample()
+        output = self.q_network(states)
+        output = torch.gather(output, 1, actions.unsqueeze(-1)).view(-1)
+        expectedOutput = self.target_network.compute_target(nextStates, rewards).view(-1)
+        self.optimizer.zero_grad()
+        loss = self.criterion(output, expectedOutput)
+        loss.backward()
+        torch.nn.utils.clip_grad_norm_(self.q_network.parameters(), 10)
+        self.optimizer.step()
+        self.n_updates += 1
+        return loss.item()
+
+    def synchronize(self):
+        self.target_network.load_state_dict(self.q_network.state_dict())
+
+    def play(self, state, exploration_rate=0.):
+        if random.random() > exploration_rate:
+            return self.q_network.predict(state.to(device))
+        else:
+            shape = (state.shape[0],) if len(state.shape) > 1 else (1,)
+            return torch.randint(0, 2, size=shape)
+
+    @functools.lru_cache(maxsize=None)
+    def exploration_slope(self, total_steps):
+        return (exploration_initial_eps - exploration_final_eps) / (exploration_fraction * total_steps)
+
+    def exploration(self, step, total_steps):
+        eps = exploration_initial_eps - step * self.exploration_slope(total_steps)
+        return max(eps, exploration_final_eps)
+
+    def train(self, total_steps):
+        obs = torch.from_numpy(env.reset()).float()
+
+        n_episodes = 0
+        length_current_episode = 0
+        lengths = []
+        avg_reward = 0
+        loss_backup = 0.
+        acc_loss = 0.
+        acc_loss_count = 0
+        self.rewards = []
+
+        with tqdm(range(total_steps), desc="Training agent", unit="steps") as pbar:
+            for step in pbar:
+                eps = self.exploration(step, total_steps)
+
+                action = self.play(obs, eps)
+                new_obs, _, done, info = env.step(action.item())
+                reward = torch.tensor([1.0 if not done else -1.0], dtype=torch.float32)
+                new_obs = torch.from_numpy(new_obs).float()
+
+                self.add_transition(obs, action, reward, new_obs)
+                loss = self.train_step(step)
+                if loss != 0:
+                    acc_loss += loss
+                    acc_loss_count += 1
+
+                if done:
+                    obs = torch.from_numpy(env.reset()).float()
+                    n_episodes += 1
+                    lengths.append(length_current_episode)
+                    self.rewards.append(length_current_episode)
+                    length_current_episode = 0
+                    if len(lengths) >= 25:
+                        avg_reward = sum(lengths) / len(lengths)
+                        if acc_loss_count != 0:
+                            loss_backup = acc_loss / acc_loss_count
+                        else:
+                            loss_backup = "??"
+                        acc_loss = 0.
+                        acc_loss_count = 0
+                        lengths = []
+                else:
+                    obs = new_obs
+                    length_current_episode += 1
+
+                pbar.set_postfix({
+                    "episodes": n_episodes,
+                    "avg_reward": avg_reward,
+                    "loss": loss_backup,
+                    "exploration_rate": eps,
+                    "n_updates": self.n_updates,
+                })
+```
+
+%% Cell type:markdown id:Kne9b7vCql3N tags:
+
+## Defining the environment
+
+%% Cell type:code id:BXw4RmGpFkZm tags:
+
+``` python
+env = gym.make("CartPole-v1")
+env.seed(seed+2)
+env.reset()
+```
+
+%% Cell type:markdown id:i93WQNsbqo68 tags:
+
+## Training our agent
+
+%% Cell type:code id:rAm6v_0HiEge tags:
+
+``` python
+agent = Agent(env)
+agent.train(120000)
+```
+
+%% Cell type:markdown id:PPT-tl4Rqroj tags:
+
+## Episodes length
+
+A very noisy curve. It does reach satisfying levels though.
+
+%% Cell type:code id:IoCnHaZKgHqI tags:
+
+``` python
+fig = plt.figure(figsize=(20, 12))
+plt.plot(agent.rewards)
+plt.xlabel("Episodes")
+plt.ylabel("Episode length")
+plt.show()
+```
+
+%% Cell type:markdown id:0fuolKppq1Ak tags:
+
+## Result visualisation
+
+%% Cell type:code id:GXT1q5ckh0dG tags:
+
+``` python
+from pyvirtualdisplay import Display
+
+virtual_display = Display(visible=0, size=(1400, 900))
+virtual_display.start()
+```
+
+%% Cell type:code id:710b8294-4f75-49b5-a54a-777439ce8799 tags:
+
+``` python
+env = gym.make("CartPole-v1")
+env.seed(4)
+env = wrappers.Monitor(env, "./CartPole-v1/", force=True)
+
+obs = env.reset()
+i = 0
+
+while True:
+    action = agent.q_network.predict(torch.from_numpy(obs).float().to(device))
+
+    obs, rewards, done, info = env.step(action.item())
+    env.render()
+    if done:
+        break
+    else:
+        i += 1
+env.close()
+print(f"Survived {i} steps")
+```
+
+%% Cell type:code id:c7ad6655-02b7-436e-a7ae-93a7222b100e tags:
+
+``` python
+def ipython_show_video(path):
+    """Shamelessly stolen from https://stackoverflow.com/a/51183488/9977878
+    """
+    if not os.path.isfile(path):
+        raise NameError("Cannot access: {}".format(path))
+
+    video = io.open(path, 'r+b').read()
+    encoded = base64.b64encode(video)
+
+    display(HTML(
+        data="""
+        <video alt="test" controls>
+        <source src="data:video/mp4;base64,{0}" type="video/mp4" />
+        </video>
+        """.format(encoded.decode('ascii'))
+    ))
+
+ipython_show_video(glob.glob("/content/CartPole-v1/*.mp4")[0])
+```
+
+%% Cell type:code id:31e6af84-489e-4665-919e-8234462c1f0a tags:
+
+``` python
+```
+%% Cell type:markdown id:w_5p3EyVknLC tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [DRL1] - Solving CartPole with DQN
+<!-- DESC --> Using a a Deep Q-Network to play CartPole - an inverted pendulum problem (PyTorch)
+<!-- AUTHOR : Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS) -->
+
+
+
+By Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS)
+
+
+
+%% Cell type:markdown id:ucB28wGpmFwi tags:
+
+## Objectives
+
+* Understand the code behind the DQN algorithm
+* Visualize the result for fun purposes :)
+
+This notebook implements a DQN from scratch and trains it. It is simply a vanilla DQN with a target network (sometimes referred as Double DQN). More sophisticated and recent modifications might help stabilize the training.
+
+Considering that we are going to use a tiny network for a simple environment, matrix multiplications are not that time consuming, and using a GPU can be detrimental as communications between CPU and GPU are no longer negligeable compared to forward and backward steps. This notebook will therefore be executed on CPU.
+
+The chosen environment will be imported from the gym toolkit (https://gym.openai.com/).
+
+%% Cell type:markdown id:fqQsB2Jwm-BP tags:
+
+## Demonstration steps:
+
+- Define numerous hyperparameters
+- Implement the Q-Network
+- Implement an agent following the Double DQN algorithm
+- Train it for a few minutes
+- Visualize the result
+
+%% Cell type:markdown id:nRJmgZ0inpkk tags:
+
+## Installations
+
+Gym requires a graphical interface to render a state observation. Xvfb allows to run the notebook headless. This software is not available on Jean Zay's compute node, hence the usage of Google colab.
+
+%% Cell type:code id:y2Y71JbfgkeU tags:
+
+``` python
+!pip3 install pyvirtualdisplay
+!pip install pyglet==1.5.11
+!apt-get install x11-utils > /dev/null 2>&1
+!apt-get install -y xvfb python-opengl > /dev/null 2>&1
+```
+
+%% Cell type:markdown id:q6eYfBKnoOJQ tags:
+
+## Imports
+
+I chose to use Pytorch to implement this DQN due to its straightforward API and personal preferences.
+Gym implements the environment.
+
+%% Cell type:code id:0fc91d65-4756-4432-906c-7d315d981775 tags:
+
+``` python
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+import gym
+from gym import wrappers
+
+import random
+from tqdm.notebook import tqdm
+
+import functools
+import matplotlib.pyplot as plt
+import os
+import io
+import base64
+import glob
+from IPython.display import display, HTML
+```
+
+%% Cell type:markdown id:Hao-RYcdowHn tags:
+
+## Hyperparameters
+
+The size of the replay buffer does not matter much. In this case, it is big enough to hold every transitions we will have in our training. This choice does have a huge impact on memory though.
+
+Warm-up allows the network to gather some information before the training process begins.
+
+The target network will only be updated once every 10k steps in order to stabilize the training.
+
+The exploration rate is linearly decreasing, although an exponential curve is a sound and common choice as well.
+
+As mentioned above, only the CPU will be used, the GPU would be useful for bigger networks, and / or environments which have a torch tensor internal state.
+
+Considering this is a simple DQN implementation, its stability leaves a lot to be desired. In order not to rely on luck, a decent seed was chosen.
+
+%% Cell type:code id:6fX1X6y6YHXF tags:
+
+``` python
+learning_rate = 0.0001
+buffer_size = 200000
+warmup_steps = 10000
+batch_size = 32
+gamma = 0.99
+train_freq = 4
+target_update_interval = 10000
+exploration_fraction = 0.1
+exploration_initial_eps = 1.0
+exploration_final_eps = 0.05
+device = torch.device("cpu") # torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+seed = 987654321
+np.random.seed(seed)
+torch.manual_seed(seed)
+random.seed(seed)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed(seed)
+```
+
+%% Cell type:markdown id:TofGB-s7qfSH tags:
+
+## Q-Network and Agent implementation
+
+%% Cell type:code id:4VhftO9PaE9g tags:
+
+``` python
+class DQN(nn.Module):
+
+    def __init__(self):
+        super(DQN, self).__init__()
+        self.layer1 = nn.Linear(4, 64)
+        self.layer2 = nn.Linear(64, 64)
+        self.layer3 = nn.Linear(64, 2)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.relu(self.layer1(x))
+        x = self.relu(self.layer2(x))
+        return self.layer3(x)
+
+    def compute_target(self, x, rewards):
+        with torch.no_grad():
+            values = torch.zeros(x.shape[0], device=device)
+            values[rewards != 1] = torch.max(self.forward(x[rewards != 1]), dim=-1)[0]
+            values = rewards + gamma * values
+        return values
+
+    def predict(self, x):
+        if len(x.shape) < 2:
+            x = x[None, :]
+        with torch.no_grad():
+            x = torch.argmax(self.forward(x), dim=-1)
+        if x.device.type == "cuda":
+            x = x.cpu()
+        return x
+
+class Agent:
+
+    def __init__(self, env):
+        self.env = env
+        self.q_network = DQN().to(device)
+        self.target_network = DQN().to(device)
+        self.target_network.eval()
+        self.synchronize()
+        self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr=learning_rate)
+        self.criterion = nn.MSELoss()
+        self.buffer = []
+        self.n_updates = 0
+
+    def add_transition(self, state, action, reward, nextState):
+        self.buffer.append((state, action, reward, nextState))
+        if len(self.buffer) > buffer_size:
+            self.buffer.pop(random.randrange(len(self.buffer)))
+
+    def sample(self):
+        transitions = random.sample(self.buffer, batch_size)
+        states, actions, rewards, nextStates = zip(*transitions)
+        states = torch.stack(states).to(device)
+        actions = torch.cat(actions).to(device)
+        rewards = torch.cat(rewards).to(device)
+        nextStates = torch.stack(nextStates).to(device)
+        return states, actions, rewards, nextStates
+
+    def train_step(self, step):
+        if step % target_update_interval == 0:
+            self.synchronize()
+        if step < warmup_steps or step % train_freq != 0:
+            return 0.
+
+        states, actions, rewards, nextStates = self.sample()
+        output = self.q_network(states)
+        output = torch.gather(output, 1, actions.unsqueeze(-1)).view(-1)
+        expectedOutput = self.target_network.compute_target(nextStates, rewards).view(-1)
+        self.optimizer.zero_grad()
+        loss = self.criterion(output, expectedOutput)
+        loss.backward()
+        torch.nn.utils.clip_grad_norm_(self.q_network.parameters(), 10)
+        self.optimizer.step()
+        self.n_updates += 1
+        return loss.item()
+
+    def synchronize(self):
+        self.target_network.load_state_dict(self.q_network.state_dict())
+
+    def play(self, state, exploration_rate=0.):
+        if random.random() > exploration_rate:
+            return self.q_network.predict(state.to(device))
+        else:
+            shape = (state.shape[0],) if len(state.shape) > 1 else (1,)
+            return torch.randint(0, 2, size=shape)
+
+    @functools.lru_cache(maxsize=None)
+    def exploration_slope(self, total_steps):
+        return (exploration_initial_eps - exploration_final_eps) / (exploration_fraction * total_steps)
+
+    def exploration(self, step, total_steps):
+        eps = exploration_initial_eps - step * self.exploration_slope(total_steps)
+        return max(eps, exploration_final_eps)
+
+    def train(self, total_steps):
+        obs = torch.from_numpy(env.reset()).float()
+
+        n_episodes = 0
+        length_current_episode = 0
+        lengths = []
+        avg_reward = 0
+        loss_backup = 0.
+        acc_loss = 0.
+        acc_loss_count = 0
+        self.rewards = []
+
+        with tqdm(range(total_steps), desc="Training agent", unit="steps") as pbar:
+            for step in pbar:
+                eps = self.exploration(step, total_steps)
+
+                action = self.play(obs, eps)
+                new_obs, _, done, info = env.step(action.item())
+                reward = torch.tensor([1.0 if not done else -1.0], dtype=torch.float32)
+                new_obs = torch.from_numpy(new_obs).float()
+
+                self.add_transition(obs, action, reward, new_obs)
+                loss = self.train_step(step)
+                if loss != 0:
+                    acc_loss += loss
+                    acc_loss_count += 1
+
+                if done:
+                    obs = torch.from_numpy(env.reset()).float()
+                    n_episodes += 1
+                    lengths.append(length_current_episode)
+                    self.rewards.append(length_current_episode)
+                    length_current_episode = 0
+                    if len(lengths) >= 25:
+                        avg_reward = sum(lengths) / len(lengths)
+                        if acc_loss_count != 0:
+                            loss_backup = acc_loss / acc_loss_count
+                        else:
+                            loss_backup = "??"
+                        acc_loss = 0.
+                        acc_loss_count = 0
+                        lengths = []
+                else:
+                    obs = new_obs
+                    length_current_episode += 1
+
+                pbar.set_postfix({
+                    "episodes": n_episodes,
+                    "avg_reward": avg_reward,
+                    "loss": loss_backup,
+                    "exploration_rate": eps,
+                    "n_updates": self.n_updates,
+                })
+```
+
+%% Cell type:markdown id:Kne9b7vCql3N tags:
+
+## Defining the environment
+
+%% Cell type:code id:BXw4RmGpFkZm tags:
+
+``` python
+env = gym.make("CartPole-v1")
+env.seed(seed+2)
+env.reset()
+```
+
+%% Cell type:markdown id:i93WQNsbqo68 tags:
+
+## Training our agent
+
+%% Cell type:code id:rAm6v_0HiEge tags:
+
+``` python
+agent = Agent(env)
+agent.train(120000)
+```
+
+%% Cell type:markdown id:PPT-tl4Rqroj tags:
+
+## Episodes length
+
+A very noisy curve. It does reach satisfying levels though.
+
+%% Cell type:code id:IoCnHaZKgHqI tags:
+
+``` python
+fig = plt.figure(figsize=(20, 12))
+plt.plot(agent.rewards)
+plt.xlabel("Episodes")
+plt.ylabel("Episode length")
+plt.show()
+```
+
+%% Cell type:markdown id:0fuolKppq1Ak tags:
+
+## Result visualisation
+
+%% Cell type:code id:GXT1q5ckh0dG tags:
+
+``` python
+from pyvirtualdisplay import Display
+
+virtual_display = Display(visible=0, size=(1400, 900))
+virtual_display.start()
+```
+
+%% Cell type:code id:710b8294-4f75-49b5-a54a-777439ce8799 tags:
+
+``` python
+env = gym.make("CartPole-v1")
+env.seed(4)
+env = wrappers.Monitor(env, "./CartPole-v1/", force=True)
+
+obs = env.reset()
+i = 0
+
+while True:
+    action = agent.q_network.predict(torch.from_numpy(obs).float().to(device))
+
+    obs, rewards, done, info = env.step(action.item())
+    env.render()
+    if done:
+        break
+    else:
+        i += 1
+env.close()
+print(f"Survived {i} steps")
+```
+
+%% Cell type:code id:c7ad6655-02b7-436e-a7ae-93a7222b100e tags:
+
+``` python
+def ipython_show_video(path):
+    """Shamelessly stolen from https://stackoverflow.com/a/51183488/9977878
+    """
+    if not os.path.isfile(path):
+        raise NameError("Cannot access: {}".format(path))
+
+    video = io.open(path, 'r+b').read()
+    encoded = base64.b64encode(video)
+
+    display(HTML(
+        data="""
+        <video alt="test" controls>
+        <source src="data:video/mp4;base64,{0}" type="video/mp4" />
+        </video>
+        """.format(encoded.decode('ascii'))
+    ))
+
+ipython_show_video(glob.glob("/content/CartPole-v1/*.mp4")[0])
+```
+
+%% Cell type:code id:31e6af84-489e-4665-919e-8234462c1f0a tags:
+
+``` python
+```
--- a/DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb
+++ b/DRL.PyTorch/FIDLE_rl_baselines_zoo.ipynb
+%% Cell type:markdown id: tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [DRL2] - RL Baselines3 Zoo: Training in Colab
+<!-- DESC --> Demo of Stable baseline3 with Colab
+<!-- AUTHOR : Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS) -->
+
+
+Demo of Stable baseline3 adapted By Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS)
+
+
+Github Repo: [https://github.com/DLR-RM/rl-baselines3-zoo](https://github.com/DLR-RM/rl-baselines3-zoo)
+
+Stable-Baselines3 Repo: [https://github.com/DLR-RM/rl-baselines3-zoo](https://github.com/DLR-RM/stable-baselines3)
+
+
+# Install Dependencies
+
+
+%% Cell type:code id: tags:
+
+``` 
+!apt-get install swig cmake ffmpeg freeglut3-dev xvfb
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!apt-get install -y \
+    libgl1-mesa-dev \
+    libgl1-mesa-glx \
+    libglew-dev \
+    libosmesa6-dev \
+    software-properties-common
+
+!apt-get install -y patchelf
+```
+
+%% Cell type:markdown id: tags:
+
+## Clone RL Baselines3 Zoo Repo
+
+%% Cell type:code id: tags:
+
+``` 
+!git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%cd /content/rl-baselines3-zoo/
+```
+
+%% Cell type:markdown id: tags:
+
+### Install pip dependencies
+
+%% Cell type:code id: tags:
+
+``` 
+!pip install -r requirements.txt
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!pip install free-mujoco-py
+```
+
+%% Cell type:markdown id: tags:
+
+## Pretrained model
+
+gym environments: https://gym.openai.com/envs/
+
+%% Cell type:code id: tags:
+
+``` 
+%cd /content/rl-baselines3-zoo/
+```
+
+%% Cell type:markdown id: tags:
+
+### Record  a Video
+
+%% Cell type:code id: tags:
+
+``` 
+# Set up display; otherwise rendering will fail
+import os
+os.system("Xvfb :1 -screen 0 1024x768x24 &")
+os.environ['DISPLAY'] = ':1'
+```
+
+%% Cell type:code id: tags:
+
+``` 
+import base64
+from pathlib import Path
+
+from IPython import display as ipythondisplay
+
+def show_videos(video_path='', prefix=''):
+  """
+  Taken from https://github.com/eleurent/highway-env
+
+  :param video_path: (str) Path to the folder containing videos
+  :param prefix: (str) Filter the video, showing only the only starting with this prefix
+  """
+  html = []
+  for mp4 in Path(video_path).glob("**/*{}*.mp4".format(prefix)):
+      video_b64 = base64.b64encode(mp4.read_bytes())
+      html.append('''{} <br> <video alt="{}" autoplay
+                    loop controls style="height: 400px;">
+                    <source src="data:video/mp4;base64,{}" type="video/mp4" />
+                </video>'''.format(mp4, mp4, video_b64.decode('ascii')))
+  ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))
+```
+
+%% Cell type:markdown id: tags:
+
+### Discrete environments
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/all_plots.py -a dqn qrdqn a2c ppo --env PongNoFrameskip-v4 -f rl-trained-agents/
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a dqn -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a qrdqn -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a a2c -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a ppo -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 5000
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!python -m utils.record_video --algo dqn --env PongNoFrameskip-v4
+```
+
+%% Cell type:code id: tags:
+
+``` 
+show_videos(video_path='rl-trained-agents/dqn', prefix='PongNoFrameskip-v4')
+```
+
+%% Cell type:markdown id: tags:
+
+### Continuous environments
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/all_plots.py -a ppo trpo sac td3 tqc --env Ant-v3 -f rl-trained-agents/
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a ppo -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a trpo -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a tqc -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a td3 -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a sac -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!python enjoy.py --algo td3 --env Ant-v3 --no-render --n-timesteps 5000
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!python -m utils.record_video --algo td3 --env Ant-v3
+```
+
+%% Cell type:code id: tags:
+
+``` 
+show_videos(video_path='rl-trained-agents/td3', prefix='Ant-v3')
+```
+
+%% Cell type:markdown id: tags:
+
+## Train an RL Agent
+
+
+The train agent can be found in the `logs/` folder.
+
+Here we will train A2C on CartPole-v1 environment for 100 000 steps.
+
+
+To train it on Pong (Atari), you just have to pass `--env PongNoFrameskip-v4`
+
+Note: You need to update `hyperparams/algo.yml` to support new environments. You can access it in the side panel of Google Colab. (see https://stackoverflow.com/questions/46986398/import-data-into-google-colaboratory)
+
+%% Cell type:code id: tags:
+
+``` 
+!python train.py --algo dqn --env PongNoFrameskip-v4 --n-timesteps 1000000
+```
+
+%% Cell type:markdown id: tags:
+
+#### Evaluate trained agent
+
+
+You can remove the `--folder logs/` to evaluate pretrained agent.
+
+%% Cell type:code id: tags:
+
+``` 
+!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 5000 --folder logs/
+```
+
+%% Cell type:markdown id: tags:
+
+#### Tune Hyperparameters
+
+We use [Optuna](https://optuna.org/) for optimizing the hyperparameters.
+
+Tune the hyperparameters for PPO, using a tpe sampler and median pruner, 2 parallels jobs,
+with a budget of 1000 trials and a maximum of 50000 steps
+
+%% Cell type:code id: tags:
+
+``` 
+#!python train.py --algo dqn --env PongNoFrameskip-v4 -n 5000 -optimize --n-trials 10 --n-jobs 5 --sampler tpe --pruner median
+```
+
+%% Cell type:markdown id: tags:
+
+### Display the video
+
+%% Cell type:markdown id: tags:
+
+### Continue Training
+
+Here, we will continue training of the previous model
+
+%% Cell type:code id: tags:
+
+``` 
+#!python train.py --algo dqn --env PongNoFrameskip-v4  --n-timesteps 50000 -i logs/dqn/PongNoFrameskip-v4_1/PongNoFrameskip-v4.zip
+```
+
+%% Cell type:code id: tags:
+
+``` 
+#!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 1000 --folder logs/
+```
+
+%% Cell type:code id: tags:
+
+``` 
+```
+%% Cell type:markdown id: tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [DRL2] - RL Baselines3 Zoo: Training in Colab
+<!-- DESC --> Demo of Stable baseline3 with Colab
+<!-- AUTHOR : Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS) -->
+
+
+Demo of Stable baseline3 adapted By Nathan Cassereau (IDRIS) and Bertrand Cabot (IDRIS)
+
+
+Github Repo: [https://github.com/DLR-RM/rl-baselines3-zoo](https://github.com/DLR-RM/rl-baselines3-zoo)
+
+Stable-Baselines3 Repo: [https://github.com/DLR-RM/rl-baselines3-zoo](https://github.com/DLR-RM/stable-baselines3)
+
+
+# Install Dependencies
+
+
+%% Cell type:code id: tags:
+
+``` 
+!apt-get install swig cmake ffmpeg freeglut3-dev xvfb
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!apt-get install -y \
+    libgl1-mesa-dev \
+    libgl1-mesa-glx \
+    libglew-dev \
+    libosmesa6-dev \
+    software-properties-common
+
+!apt-get install -y patchelf
+```
+
+%% Cell type:markdown id: tags:
+
+## Clone RL Baselines3 Zoo Repo
+
+%% Cell type:code id: tags:
+
+``` 
+!git clone --recursive https://github.com/DLR-RM/rl-baselines3-zoo
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%cd /content/rl-baselines3-zoo/
+```
+
+%% Cell type:markdown id: tags:
+
+### Install pip dependencies
+
+%% Cell type:code id: tags:
+
+``` 
+!pip install -r requirements.txt
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!pip install free-mujoco-py
+```
+
+%% Cell type:markdown id: tags:
+
+## Pretrained model
+
+gym environments: https://gym.openai.com/envs/
+
+%% Cell type:code id: tags:
+
+``` 
+%cd /content/rl-baselines3-zoo/
+```
+
+%% Cell type:markdown id: tags:
+
+### Record  a Video
+
+%% Cell type:code id: tags:
+
+``` 
+# Set up display; otherwise rendering will fail
+import os
+os.system("Xvfb :1 -screen 0 1024x768x24 &")
+os.environ['DISPLAY'] = ':1'
+```
+
+%% Cell type:code id: tags:
+
+``` 
+import base64
+from pathlib import Path
+
+from IPython import display as ipythondisplay
+
+def show_videos(video_path='', prefix=''):
+  """
+  Taken from https://github.com/eleurent/highway-env
+
+  :param video_path: (str) Path to the folder containing videos
+  :param prefix: (str) Filter the video, showing only the only starting with this prefix
+  """
+  html = []
+  for mp4 in Path(video_path).glob("**/*{}*.mp4".format(prefix)):
+      video_b64 = base64.b64encode(mp4.read_bytes())
+      html.append('''{} <br> <video alt="{}" autoplay
+                    loop controls style="height: 400px;">
+                    <source src="data:video/mp4;base64,{}" type="video/mp4" />
+                </video>'''.format(mp4, mp4, video_b64.decode('ascii')))
+  ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))
+```
+
+%% Cell type:markdown id: tags:
+
+### Discrete environments
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/all_plots.py -a dqn qrdqn a2c ppo --env PongNoFrameskip-v4 -f rl-trained-agents/
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a dqn -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a qrdqn -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a a2c -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a ppo -e PongNoFrameskip-v4 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 5000
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!python -m utils.record_video --algo dqn --env PongNoFrameskip-v4
+```
+
+%% Cell type:code id: tags:
+
+``` 
+show_videos(video_path='rl-trained-agents/dqn', prefix='PongNoFrameskip-v4')
+```
+
+%% Cell type:markdown id: tags:
+
+### Continuous environments
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/all_plots.py -a ppo trpo sac td3 tqc --env Ant-v3 -f rl-trained-agents/
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a ppo -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a trpo -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a tqc -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a td3 -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+%run scripts/plot_train.py -a sac -e Ant-v3 -f rl-trained-agents/ -x time
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!python enjoy.py --algo td3 --env Ant-v3 --no-render --n-timesteps 5000
+```
+
+%% Cell type:code id: tags:
+
+``` 
+!python -m utils.record_video --algo td3 --env Ant-v3
+```
+
+%% Cell type:code id: tags:
+
+``` 
+show_videos(video_path='rl-trained-agents/td3', prefix='Ant-v3')
+```
+
+%% Cell type:markdown id: tags:
+
+## Train an RL Agent
+
+
+The train agent can be found in the `logs/` folder.
+
+Here we will train A2C on CartPole-v1 environment for 100 000 steps.
+
+
+To train it on Pong (Atari), you just have to pass `--env PongNoFrameskip-v4`
+
+Note: You need to update `hyperparams/algo.yml` to support new environments. You can access it in the side panel of Google Colab. (see https://stackoverflow.com/questions/46986398/import-data-into-google-colaboratory)
+
+%% Cell type:code id: tags:
+
+``` 
+!python train.py --algo dqn --env PongNoFrameskip-v4 --n-timesteps 1000000
+```
+
+%% Cell type:markdown id: tags:
+
+#### Evaluate trained agent
+
+
+You can remove the `--folder logs/` to evaluate pretrained agent.
+
+%% Cell type:code id: tags:
+
+``` 
+!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 5000 --folder logs/
+```
+
+%% Cell type:markdown id: tags:
+
+#### Tune Hyperparameters
+
+We use [Optuna](https://optuna.org/) for optimizing the hyperparameters.
+
+Tune the hyperparameters for PPO, using a tpe sampler and median pruner, 2 parallels jobs,
+with a budget of 1000 trials and a maximum of 50000 steps
+
+%% Cell type:code id: tags:
+
+``` 
+#!python train.py --algo dqn --env PongNoFrameskip-v4 -n 5000 -optimize --n-trials 10 --n-jobs 5 --sampler tpe --pruner median
+```
+
+%% Cell type:markdown id: tags:
+
+### Display the video
+
+%% Cell type:markdown id: tags:
+
+### Continue Training
+
+Here, we will continue training of the previous model
+
+%% Cell type:code id: tags:
+
+``` 
+#!python train.py --algo dqn --env PongNoFrameskip-v4  --n-timesteps 50000 -i logs/dqn/PongNoFrameskip-v4_1/PongNoFrameskip-v4.zip
+```
+
+%% Cell type:code id: tags:
+
+``` 
+#!python enjoy.py --algo dqn --env PongNoFrameskip-v4 --no-render --n-timesteps 1000 --folder logs/
+```
+
+%% Cell type:code id: tags:
+
+``` 
+```
--- a/Embedding.Keras3/01-One-hot-encoding.ipynb
+++ b/Embedding.Keras3/01-One-hot-encoding.ipynb
+%% Cell type:markdown id: tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3IMDB1] - Sentiment analysis with hot-one encoding
+<!-- DESC --> A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+ - Understand the management of **textual data** and **sentiment analysis**
+
+Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
+Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
+For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)
+
+## What we're going to do :
+
+ - Retrieve data
+ - Preparing the data
+ - Build a model
+ - Train the model
+ - Evaluate the result
+
+%% Cell type:markdown id: tags:
+
+## Step 1 - Import and init
+### 1.1 - Python stuff
+
+%% Cell type:code id: tags:
+
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras
+import keras.datasets.imdb as imdb
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+import fidle
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB1')
+```
+
+%% Cell type:markdown id: tags:
+
+### 1.2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.\
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).\
+`hide_most_frequently` is the number of ignored words, among the most common ones\
+`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch
+
+%% Cell type:code id: tags:
+
+``` python
+vocab_size           = 5000
+hide_most_frequently = 0
+
+epochs               = 10
+batch_size           = 512
+fit_verbosity        = 1
+```
+
+%% Cell type:markdown id: tags:
+
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.override('vocab_size', 'hide_most_frequently', 'batch_size', 'epochs', 'fit_verbosity')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Understanding hot-one encoding
+#### We have a **sentence** and a **dictionary** :
+
+%% Cell type:code id: tags:
+
+``` python
+sentence = "I've never seen a movie like this before"
+
+dictionary  = {"a":0, "before":1, "fantastic":2, "i've":3, "is":4, "like":5, "movie":6, "never":7, "seen":8, "this":9}
+```
+
+%% Cell type:markdown id: tags:
+
+#### We encode our sentence as a **numerical vector** :
+
+%% Cell type:code id: tags:
+
+``` python
+sentence_words = sentence.lower().split()
+
+sentence_vect  = [ dictionary[w] for w in sentence_words ]
+
+print('Words sentence are         : ', sentence_words)
+print('Our vectorized sentence is : ', sentence_vect)
+```
+
+%% Cell type:markdown id: tags:
+
+#### Next, we **one-hot** encode our vectorized sentence as a tensor :
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- We get a (sentence length x vector size) matrix of zeros
+#
+onehot = np.zeros( (10,8) )
+
+# ---- We set some 1 for each word
+#
+for i,w in enumerate(sentence_vect):
+    onehot[w,i]=1
+
+# --- Show it
+#
+print('In a basic way :\n\n', onehot, '\n\nWith a pandas wiew :\n')
+data={ f'{sentence_words[i]:.^10}':onehot[:,i] for i,w in enumerate(sentence_vect) }
+df=pd.DataFrame(data)
+df.index=dictionary.keys()
+# --- Pandas Warning
+#
+df.style.format('{:1.0f}').highlight_max(axis=0).set_properties(**{'text-align': 'center'})
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Retrieve data
+
+IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets/imdb/)
+Note : Due to their nature, textual data can be somewhat complex.
+
+### 3.1 - Data structure :
+The dataset is composed of 2 parts:
+
+ - **reviews**, this will be our **x**
+ - **opinions** (positive/negative), this will be our **y**
+
+There are also a **dictionary**, because words are indexed in reviews
+
+```
+<dataset> = (<reviews>, <opinions>)
+
+with :  <reviews>  = [ <review1>, <review2>, ... ]
+        <opinions> = [ <rate1>,   <rate2>,   ... ]   where <ratei>   = integer
+
+where : <reviewi> = [ <w1>, <w2>, ...]    <wi> are the index (int) of the word in the dictionary
+        <ratei>   = int                   0 for negative opinion, 1 for positive
+
+
+<dictionary> = [ <word1>:<w1>, <word2>:<w2>, ... ]
+
+with :  <wordi>   = word
+        <wi>      = int
+
+```
+
+%% Cell type:markdown id: tags:
+
+### 3.2 - Load dataset
+For simplicity, we will use a pre-formatted dataset - See [documentation](https://keras.io/api/datasets/imdb)
+However, Keras offers some useful tools for formatting textual data - See [documentation](hhttps://keras.io/api/layers/preprocessing_layers/text/text_vectorization/)
+
+By default :
+ - Start of a sequence will be marked with : 1
+ - Out of vocabulary word will be : 2
+ - First index will be : 3
+
+%% Cell type:code id: tags:
+
+``` python
+# ----- Retrieve x,y
+#
+start_char = 1      # Start of a sequence (padding is 0)
+oov_char   = 2      # Out-of-vocabulary
+index_from = 3      # First word id
+
+(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
+                                                       skip_top   = hide_most_frequently,
+                                                       start_char = start_char,
+                                                       oov_char   = oov_char,
+                                                       index_from = index_from)
+
+# ---- About
+#
+print("Max(x_train,x_test)  : ", fidle.utils.rmax([x_train,x_test]) )
+print("Min(x_train,x_test)  : ", fidle.utils.rmin([x_train,x_test]) )
+print("Len(x_train)         : ", len(x_train))
+print("Len(x_test)          : ", len(x_test))
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - About our dataset
+When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
+So, we shifted the dataset by 3 with the parameter index_from=3
+
+### 4.1 - Sentences encoding
+
+%% Cell type:code id: tags:
+
+``` python
+print('\nReview example (x_train[12]) :\n\n',x_train[12])
+print('\nOpinions (y_train) :\n\n',y_train)
+```
+
+%% Cell type:markdown id: tags:
+
+### 4.2 - Load dictionary
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- Retrieve dictionary {word:index}, and encode it in ascii
+#
+word_index = imdb.get_word_index()
+
+# ---- Shift the dictionary from <index_from>
+#
+word_index = {w:(i+index_from) for w,i in word_index.items()}
+
+# ---- Add <pad>, <start> and <unknown> tags
+#
+word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
+
+# ---- Create a reverse dictionary : {index:word}
+#
+index_word = {index:word for word,index in word_index.items()}
+
+# ---- About dictionary
+#
+print('\nDictionary size     : ', len(word_index))
+print('\nSmall extract :\n')
+for k in range(440,455):print(f'    {k:2d} : {index_word[k]}' )
+
+# ---- Add a nice function to transpose :
+#
+def dataset2text(review):
+    return ' '.join([index_word.get(i, '?') for i in review])
+```
+
+%% Cell type:markdown id: tags:
+
+### 4.3 - Have a look, for human
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.utils.subtitle('Review example :')
+print(x_train[12])
+fidle.utils.subtitle('After translation :')
+print(dataset2text(x_train[12]))
+```
+
+%% Cell type:markdown id: tags:
+
+### 4.4 - Few statistics
+
+%% Cell type:code id: tags:
+
+``` python
+sizes=[len(i) for i in x_train]
+plt.figure(figsize=(12,4))
+plt.hist(sizes, bins=400)
+plt.gca().set(title='Distribution of reviews by size - [{:5.2f}, {:5.2f}]'.format(min(sizes),max(sizes)),
+              xlabel='Size', ylabel='Density', xlim=[0,1500])
+fidle.scrawler.save_fig('01-stats-sizes')
+plt.show()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+unk=[ 100*(s.count(oov_char)/len(s)) for s in x_train]
+plt.figure(figsize=(12,4))
+plt.hist(unk, bins=100)
+plt.gca().set(title='Percent of unknown words - [{:5.2f}, {:5.2f}]'.format(min(unk),max(unk)),
+              xlabel='# unknown', ylabel='Density', xlim=[0,30])
+fidle.scrawler.save_fig('02-stats-unknown')
+plt.show()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Basic approach with "one-hot" vector encoding
+Basic approach.
+
+Each sentence is encoded with a **vector** of length equal to the **size of the dictionary**.
+
+Each sentence will therefore be encoded with a simple vector.
+The value of each component is 0 if the word is not present in the sentence or 1 if the word is present.
+
+For a sentence s=[3,4,7] and a dictionary of 10 words...
+We wil have a vector v=[0,0,0,1,1,0,0,1,0,0,0]
+
+
+%% Cell type:markdown id: tags:
+
+### 5.1 - Our one-hot encoder function
+
+%% Cell type:code id: tags:
+
+``` python
+def one_hot_encoder(x, vector_size=10000):
+
+    # ---- Set all to 0
+    #
+    x_encoded = np.zeros((len(x), vector_size))
+
+    # ---- For each sentence
+    #
+    for i,sentence in enumerate(x):
+        for word in sentence:
+            x_encoded[i, word] = 1.
+
+    return x_encoded
+```
+
+%% Cell type:markdown id: tags:
+
+### 5.2 - Encoding..
+
+%% Cell type:code id: tags:
+
+``` python
+x_train = one_hot_encoder(x_train, vector_size=vocab_size)
+x_test  = one_hot_encoder(x_test,  vector_size=vocab_size)
+
+print("To have a look, x_train[12] became :", x_train[12] )
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 6 - Build a nice model
+
+%% Cell type:code id: tags:
+
+``` python
+model = keras.Sequential(name='My IMDB classifier')
+
+model.add(keras.layers.Input( shape=(vocab_size,) ))
+model.add(keras.layers.Dense( 32, activation='relu'))
+model.add(keras.layers.Dense( 32, activation='relu'))
+model.add(keras.layers.Dense( 1,  activation='sigmoid'))
+
+model.compile(optimizer = 'rmsprop',
+                  loss      = 'binary_crossentropy',
+                  metrics   = ['accuracy'])
+
+model.summary()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 7 - Train the model
+### 7.1 - Add callback
+
+%% Cell type:code id: tags:
+
+``` python
+os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)
+save_dir = f'{run_dir}/models/best_model.keras'
+
+savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
+```
+
+%% Cell type:markdown id: tags:
+
+### 7.2 - Train it
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+history = model.fit(x_train,
+                    y_train,
+                    epochs          = epochs,
+                    batch_size      = batch_size,
+                    validation_data = (x_test, y_test),
+                    verbose         = fit_verbosity,
+                    callbacks       = [savemodel_callback])
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 8 - Evaluate
+### 8.1 - Training history
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.scrawler.history(history, save_as='02-history')
+```
+
+%% Cell type:markdown id: tags:
+
+### 8.2 - Reload and evaluate best model
+
+%% Cell type:code id: tags:
+
+``` python
+model = keras.models.load_model(f'{run_dir}/models/best_model.keras')
+
+# ---- Evaluate
+score  = model.evaluate(x_test, y_test, verbose=0)
+
+print('\n\nModel evaluation :\n')
+print('    x_test / loss      : {:5.4f}'.format(score[0]))
+print('    x_test / accuracy  : {:5.4f}'.format(score[1]))
+
+values=[score[1], 1-score[1]]
+fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
+
+# ---- Confusion matrix
+
+y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
+
+y_pred = y_sigmoid.copy()
+y_pred[ y_sigmoid< 0.5 ] = 0
+y_pred[ y_sigmoid>=0.5 ] = 1
+
+fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
+fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.end()
+```
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3IMDB1] - Sentiment analysis with hot-one encoding
+<!-- DESC --> A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+ - Understand the management of **textual data** and **sentiment analysis**
+
+Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
+Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
+For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)
+
+## What we're going to do :
+
+ - Retrieve data
+ - Preparing the data
+ - Build a model
+ - Train the model
+ - Evaluate the result
+
+%% Cell type:markdown id: tags:
+
+## Step 1 - Import and init
+### 1.1 - Python stuff
+
+%% Cell type:code id: tags:
+
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras
+import keras.datasets.imdb as imdb
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+import fidle
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB1')
+```
+
+%% Cell type:markdown id: tags:
+
+### 1.2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.\
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).\
+`hide_most_frequently` is the number of ignored words, among the most common ones\
+`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch
+
+%% Cell type:code id: tags:
+
+``` python
+vocab_size           = 5000
+hide_most_frequently = 0
+
+epochs               = 10
+batch_size           = 512
+fit_verbosity        = 1
+```
+
+%% Cell type:markdown id: tags:
+
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.override('vocab_size', 'hide_most_frequently', 'batch_size', 'epochs', 'fit_verbosity')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Understanding hot-one encoding
+#### We have a **sentence** and a **dictionary** :
+
+%% Cell type:code id: tags:
+
+``` python
+sentence = "I've never seen a movie like this before"
+
+dictionary  = {"a":0, "before":1, "fantastic":2, "i've":3, "is":4, "like":5, "movie":6, "never":7, "seen":8, "this":9}
+```
+
+%% Cell type:markdown id: tags:
+
+#### We encode our sentence as a **numerical vector** :
+
+%% Cell type:code id: tags:
+
+``` python
+sentence_words = sentence.lower().split()
+
+sentence_vect  = [ dictionary[w] for w in sentence_words ]
+
+print('Words sentence are         : ', sentence_words)
+print('Our vectorized sentence is : ', sentence_vect)
+```
+
+%% Cell type:markdown id: tags:
+
+#### Next, we **one-hot** encode our vectorized sentence as a tensor :
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- We get a (sentence length x vector size) matrix of zeros
+#
+onehot = np.zeros( (10,8) )
+
+# ---- We set some 1 for each word
+#
+for i,w in enumerate(sentence_vect):
+    onehot[w,i]=1
+
+# --- Show it
+#
+print('In a basic way :\n\n', onehot, '\n\nWith a pandas wiew :\n')
+data={ f'{sentence_words[i]:.^10}':onehot[:,i] for i,w in enumerate(sentence_vect) }
+df=pd.DataFrame(data)
+df.index=dictionary.keys()
+# --- Pandas Warning
+#
+df.style.format('{:1.0f}').highlight_max(axis=0).set_properties(**{'text-align': 'center'})
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Retrieve data
+
+IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets/imdb/)
+Note : Due to their nature, textual data can be somewhat complex.
+
+### 3.1 - Data structure :
+The dataset is composed of 2 parts:
+
+ - **reviews**, this will be our **x**
+ - **opinions** (positive/negative), this will be our **y**
+
+There are also a **dictionary**, because words are indexed in reviews
+
+```
+<dataset> = (<reviews>, <opinions>)
+
+with :  <reviews>  = [ <review1>, <review2>, ... ]
+        <opinions> = [ <rate1>,   <rate2>,   ... ]   where <ratei>   = integer
+
+where : <reviewi> = [ <w1>, <w2>, ...]    <wi> are the index (int) of the word in the dictionary
+        <ratei>   = int                   0 for negative opinion, 1 for positive
+
+
+<dictionary> = [ <word1>:<w1>, <word2>:<w2>, ... ]
+
+with :  <wordi>   = word
+        <wi>      = int
+
+```
+
+%% Cell type:markdown id: tags:
+
+### 3.2 - Load dataset
+For simplicity, we will use a pre-formatted dataset - See [documentation](https://keras.io/api/datasets/imdb)
+However, Keras offers some useful tools for formatting textual data - See [documentation](hhttps://keras.io/api/layers/preprocessing_layers/text/text_vectorization/)
+
+By default :
+ - Start of a sequence will be marked with : 1
+ - Out of vocabulary word will be : 2
+ - First index will be : 3
+
+%% Cell type:code id: tags:
+
+``` python
+# ----- Retrieve x,y
+#
+start_char = 1      # Start of a sequence (padding is 0)
+oov_char   = 2      # Out-of-vocabulary
+index_from = 3      # First word id
+
+(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
+                                                       skip_top   = hide_most_frequently,
+                                                       start_char = start_char,
+                                                       oov_char   = oov_char,
+                                                       index_from = index_from)
+
+# ---- About
+#
+print("Max(x_train,x_test)  : ", fidle.utils.rmax([x_train,x_test]) )
+print("Min(x_train,x_test)  : ", fidle.utils.rmin([x_train,x_test]) )
+print("Len(x_train)         : ", len(x_train))
+print("Len(x_test)          : ", len(x_test))
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 4 - About our dataset
+When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
+So, we shifted the dataset by 3 with the parameter index_from=3
+
+### 4.1 - Sentences encoding
+
+%% Cell type:code id: tags:
+
+``` python
+print('\nReview example (x_train[12]) :\n\n',x_train[12])
+print('\nOpinions (y_train) :\n\n',y_train)
+```
+
+%% Cell type:markdown id: tags:
+
+### 4.2 - Load dictionary
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- Retrieve dictionary {word:index}, and encode it in ascii
+#
+word_index = imdb.get_word_index()
+
+# ---- Shift the dictionary from <index_from>
+#
+word_index = {w:(i+index_from) for w,i in word_index.items()}
+
+# ---- Add <pad>, <start> and <unknown> tags
+#
+word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
+
+# ---- Create a reverse dictionary : {index:word}
+#
+index_word = {index:word for word,index in word_index.items()}
+
+# ---- About dictionary
+#
+print('\nDictionary size     : ', len(word_index))
+print('\nSmall extract :\n')
+for k in range(440,455):print(f'    {k:2d} : {index_word[k]}' )
+
+# ---- Add a nice function to transpose :
+#
+def dataset2text(review):
+    return ' '.join([index_word.get(i, '?') for i in review])
+```
+
+%% Cell type:markdown id: tags:
+
+### 4.3 - Have a look, for human
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.utils.subtitle('Review example :')
+print(x_train[12])
+fidle.utils.subtitle('After translation :')
+print(dataset2text(x_train[12]))
+```
+
+%% Cell type:markdown id: tags:
+
+### 4.4 - Few statistics
+
+%% Cell type:code id: tags:
+
+``` python
+sizes=[len(i) for i in x_train]
+plt.figure(figsize=(12,4))
+plt.hist(sizes, bins=400)
+plt.gca().set(title='Distribution of reviews by size - [{:5.2f}, {:5.2f}]'.format(min(sizes),max(sizes)),
+              xlabel='Size', ylabel='Density', xlim=[0,1500])
+fidle.scrawler.save_fig('01-stats-sizes')
+plt.show()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+unk=[ 100*(s.count(oov_char)/len(s)) for s in x_train]
+plt.figure(figsize=(12,4))
+plt.hist(unk, bins=100)
+plt.gca().set(title='Percent of unknown words - [{:5.2f}, {:5.2f}]'.format(min(unk),max(unk)),
+              xlabel='# unknown', ylabel='Density', xlim=[0,30])
+fidle.scrawler.save_fig('02-stats-unknown')
+plt.show()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 5 - Basic approach with "one-hot" vector encoding
+Basic approach.
+
+Each sentence is encoded with a **vector** of length equal to the **size of the dictionary**.
+
+Each sentence will therefore be encoded with a simple vector.
+The value of each component is 0 if the word is not present in the sentence or 1 if the word is present.
+
+For a sentence s=[3,4,7] and a dictionary of 10 words...
+We wil have a vector v=[0,0,0,1,1,0,0,1,0,0,0]
+
+
+%% Cell type:markdown id: tags:
+
+### 5.1 - Our one-hot encoder function
+
+%% Cell type:code id: tags:
+
+``` python
+def one_hot_encoder(x, vector_size=10000):
+
+    # ---- Set all to 0
+    #
+    x_encoded = np.zeros((len(x), vector_size))
+
+    # ---- For each sentence
+    #
+    for i,sentence in enumerate(x):
+        for word in sentence:
+            x_encoded[i, word] = 1.
+
+    return x_encoded
+```
+
+%% Cell type:markdown id: tags:
+
+### 5.2 - Encoding..
+
+%% Cell type:code id: tags:
+
+``` python
+x_train = one_hot_encoder(x_train, vector_size=vocab_size)
+x_test  = one_hot_encoder(x_test,  vector_size=vocab_size)
+
+print("To have a look, x_train[12] became :", x_train[12] )
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 6 - Build a nice model
+
+%% Cell type:code id: tags:
+
+``` python
+model = keras.Sequential(name='My IMDB classifier')
+
+model.add(keras.layers.Input( shape=(vocab_size,) ))
+model.add(keras.layers.Dense( 32, activation='relu'))
+model.add(keras.layers.Dense( 32, activation='relu'))
+model.add(keras.layers.Dense( 1,  activation='sigmoid'))
+
+model.compile(optimizer = 'rmsprop',
+                  loss      = 'binary_crossentropy',
+                  metrics   = ['accuracy'])
+
+model.summary()
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 7 - Train the model
+### 7.1 - Add callback
+
+%% Cell type:code id: tags:
+
+``` python
+os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)
+save_dir = f'{run_dir}/models/best_model.keras'
+
+savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
+```
+
+%% Cell type:markdown id: tags:
+
+### 7.2 - Train it
+
+%% Cell type:code id: tags:
+
+``` python
+%%time
+
+history = model.fit(x_train,
+                    y_train,
+                    epochs          = epochs,
+                    batch_size      = batch_size,
+                    validation_data = (x_test, y_test),
+                    verbose         = fit_verbosity,
+                    callbacks       = [savemodel_callback])
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 8 - Evaluate
+### 8.1 - Training history
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.scrawler.history(history, save_as='02-history')
+```
+
+%% Cell type:markdown id: tags:
+
+### 8.2 - Reload and evaluate best model
+
+%% Cell type:code id: tags:
+
+``` python
+model = keras.models.load_model(f'{run_dir}/models/best_model.keras')
+
+# ---- Evaluate
+score  = model.evaluate(x_test, y_test, verbose=0)
+
+print('\n\nModel evaluation :\n')
+print('    x_test / loss      : {:5.4f}'.format(score[0]))
+print('    x_test / accuracy  : {:5.4f}'.format(score[1]))
+
+values=[score[1], 1-score[1]]
+fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')
+
+# ---- Confusion matrix
+
+y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
+
+y_pred = y_sigmoid.copy()
+y_pred[ y_sigmoid< 0.5 ] = 0
+y_pred[ y_sigmoid>=0.5 ] = 1
+
+fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
+fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.end()
+```
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/IMDB/03-LSTM-Keras.ipynb
+++ b/IMDB/03-LSTM-Keras.ipynb
 %% Cell type:markdown id: tags:

-Text Embedding - IMDB dataset
-=============================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
-
-## Text classification using **Text embedding** :
+<img width="800px" src="../fidle/img/header.svg"></img>

-The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+# <!-- TITLE --> [K3IMDB2] - Sentiment analysis with text embedding
+<!-- DESC --> A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+ - Understand the management of **textual data** and **sentiment analysis**

 Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
 Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
+For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)

-What we're going to do:
+## What we're going to do :

 - Retrieve data
 - Preparing the data
 - Build a model
 - Train the model
 - Evaluate the result

 %% Cell type:markdown id: tags:

-## Step 1 - Init python stuff
+## Step 1 - Import and init
+### 1.1 - Python stuff

 %% Cell type:code id: tags:

 ``` python
-import numpy as np
+import os
+os.environ['KERAS_BACKEND'] = 'torch'

-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
+import keras
+import keras.datasets.imdb as imdb

+import h5py,json
+import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
-import matplotlib
-import seaborn as sns
-
-import os,sys,h5py,json

-from importlib import reload
+import fidle

-sys.path.append('..')
-import fidle.pwk as ooo
-
-ooo.init()
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB2')
 ```

 %% Cell type:markdown id: tags:

-## Step 2 - Retrieve data
-
-**From Keras :**
-This IMDb dataset can bet get directly from [Keras datasets](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-
-Due to their nature, textual data can be somewhat complex.
-
-### 2.1 - Data structure :
-The dataset is composed of 2 parts: **reviews** and **opinions** (positive/negative),  with a **dictionary**
-
-  - dataset = (reviews, opinions)
-    - reviews = \[ review_0, review_1, ...\]
-      - review_i = [ int1, int2, ...] where int_i is the index of the word in the dictionary.
-    - opinions = \[ int0, int1, ...\] where int_j == 0 if opinion is negative or 1 if opinion is positive.
-  - dictionary = \[ mot1:int1, mot2:int2, ... ]
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Get dataset
-For simplicity, we will use a pre-formatted dataset.
-See : https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data
-
-However, Keras offers some usefull tools for formatting textual data.
-See : https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text
+### 1.2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
+`hide_most_frequently` is the number of ignored words, among the most common ones
+`review_len` is the review length
+`dense_vector_size` is the size of the generated dense vectors
+`output_dir` is where we will go to save our dictionaries. (./data is a good choice)\
+`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch

 %% Cell type:code id: tags:

 ``` python
-vocab_size = 10000
+vocab_size           = 5000
+hide_most_frequently = 0

-# ----- Retrieve x,y
-#
-(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
-                                                       skip_top   = 0,
-                                                       maxlen     = None,
-                                                       seed       = 42,
-                                                       start_char = 1,
-                                                       oov_char   = 2,
-                                                       index_from = 3, )
-```
+review_len           = 256
+dense_vector_size    = 32

-%% Cell type:code id: tags:
-
-``` python
-print("  Max(x_train,x_test)  : ", ooo.rmax([x_train,x_test]) )
-print("  x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
-print("  x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
+epochs               = 30
+batch_size           = 512

-print('\nReview example (x_train[12]) :\n\n',x_train[12])
+output_dir           = './data'
+fit_verbosity        = 1
 ```

 %% Cell type:markdown id: tags:

-### 2.3 - Have a look for humans (optional)
-When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
-So, we shifted the dataset by 3 with the parameter index_from=3
+Override parameters (batch mode) - Just forget this cell

 %% Cell type:code id: tags:

 ``` python
-# ---- Retrieve dictionary {word:index}, and encode it in ascii
+fidle.override('vocab_size', 'hide_most_frequently', 'review_len', 'dense_vector_size')
+fidle.override('batch_size', 'epochs', 'output_dir', 'fit_verbosity')
+```

-word_index = imdb.get_word_index()
+%% Cell type:markdown id: tags:

-# ---- Shift the dictionary from +3
+## Step 2 - Retrieve data

-word_index = {w:(i+3) for w,i in word_index.items()}
+IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets)
+Note : Due to their nature, textual data can be somewhat complex.

-# ---- Add <pad>, <start> and unknown tags
+For more details about the management of this dataset, see notebook [IMDB1](01-One-hot-encoding.ipynb)

-word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )
+%% Cell type:markdown id: tags:

-# ---- Create a reverse dictionary : {index:word}
+### 2.2 - Get dataset

-index_word = {index:word for word,index in word_index.items()}
+%% Cell type:code id: tags:

-# ---- Add a nice function to transpose :
+``` python
+# ----- Retrieve x,y
 #
-def dataset2text(review):
-    return ' '.join([index_word.get(i, '?') for i in review])
-```
+start_char = 1      # Start of a sequence (padding is 0)
+oov_char   = 2      # Out-of-vocabulary
+index_from = 3      # First word id

-%% Cell type:code id: tags:
+(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
+                                                       skip_top   = hide_most_frequently,
+                                                       start_char = start_char,
+                                                       oov_char   = oov_char,
+                                                       index_from = index_from)

-``` python
-print('\nDictionary size     : ', len(word_index))
-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-print('\nIn real words :\n\n', dataset2text(x_train[12]))
+# ---- About
+#
+print("Max(x_train,x_test)  : ", fidle.utils.rmax([x_train,x_test]) )
+print("Min(x_train,x_test)  : ", fidle.utils.rmin([x_train,x_test]) )
+print("Len(x_train)         : ", len(x_train))
+print("Len(x_test)          : ", len(x_test))
 ```

 %% Cell type:markdown id: tags:

-### 2.4 - Have a look for neurons
+### 2.2 - Load dictionary
+Not essential, but nice if you want to take a closer look at our reviews ;-)

 %% Cell type:code id: tags:

 ``` python
-plt.figure(figsize=(12, 6))
-ax=sns.distplot([len(i) for i in x_train],bins=60)
-ax.set_title('Distribution of reviews by size')
-plt.xlabel("Review's sizes")
-plt.ylabel('Density')
-ax.set_xlim(0, 1500)
-plt.show()
+# ---- Retrieve dictionary {word:index}, and encode it in ascii
+#      Shift the dictionary from +3
+#      Add <pad>, <start> and <unknown> tags
+#      Create a reverse dictionary : {index:word}
+#
+word_index = imdb.get_word_index()
+word_index = {w:(i+index_from) for w,i in word_index.items()}
+word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
+index_word = {index:word for word,index in word_index.items()}
+
+# ---- A nice function to transpose :
+#
+def dataset2text(review):
+    return ' '.join([index_word.get(i, '?') for i in review])
 ```

 %% Cell type:markdown id: tags:

-## Step 3 - Preprocess the data
-In order to be processed by an NN, all entries must have the same length.
+## Step 3 - Preprocess the data (padding)
+In order to be processed by an NN, all entries must have the **same length.**
 We chose a review length of **review_len**
-We will therefore complete them with a padding (of \<pad\>\)
+We will therefore complete them with a padding (of 0 as \<pad\>\)

 %% Cell type:code id: tags:

 ``` python
-review_len = 256
-
 x_train = keras.preprocessing.sequence.pad_sequences(x_train,
                                                     value   = 0,
                                                     padding = 'post',
                                                     maxlen  = review_len)

 x_test  = keras.preprocessing.sequence.pad_sequences(x_test,
                                                     value   = 0 ,
                                                     padding = 'post',
                                                     maxlen  = review_len)

-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-print('\nIn real words :\n\n', dataset2text(x_train[12]))
+fidle.utils.subtitle('After padding :')
+print(x_train[12])
 ```

 %% Cell type:markdown id: tags:

-### Save dataset and dictionary (can be usefull)
+**Save dataset and dictionary (For future use but not mandatory)**

 %% Cell type:code id: tags:

 ``` python
-os.makedirs('./data',   mode=0o750, exist_ok=True)
+# ---- Write dataset in a h5 file, could be usefull
+#
+fidle.utils.mkdir(output_dir)

-with h5py.File('./data/dataset_imdb.h5', 'w') as f:
+with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:
    f.create_dataset("x_train",    data=x_train)
    f.create_dataset("y_train",    data=y_train)
    f.create_dataset("x_test",     data=x_test)
    f.create_dataset("y_test",     data=y_test)
+    print('Dataset h5 file saved.')

-with open('./data/word_index.json', 'w') as fp:
+with open(f'{output_dir}/word_index.json', 'w') as fp:
    json.dump(word_index, fp)
-
-with open('./data/index_word.json', 'w') as fp:
-    json.dump(index_word, fp)
-
-print('Saved.')
+    print('Word to index saved.')
 ```

 %% Cell type:markdown id: tags:

 ## Step 4 - Build the model
-Few remarks :
-1. We'll choose a dense vector size for the embedding output with **dense_vector_size**
-2. **GlobalAveragePooling1D** do a pooling on the last dimension : (None, lx, ly) -> (None, ly)
-In other words: we average the set of vectors/words of a sentence
-3. L'embedding de Keras fonctionne de manière supervisée. Il s'agit d'une couche de *vocab_size* neurones vers *n_neurons* permettant de maintenir une table de vecteurs (les poids constituent les vecteurs). Cette couche ne calcule pas de sortie a la façon des couches normales, mais renvois la valeur des vecteurs. n mots => n vecteurs (ensuite empilés par le pooling)
-Voir : https://stats.stackexchange.com/questions/324992/how-the-embedding-layer-is-trained-in-keras-embedding-layer
-
-A SUIVRE : https://www.liip.ch/en/blog/sentiment-detection-with-keras-word-embeddings-and-lstm-deep-learning-networks
-### 4.1 - Build
-More documentation about :
- - [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding)
- - [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D)
+
+More documentation about this model functions :
+ - [Embedding](https://keras.io/api/layers/core_layers/embedding/)
+ - [GlobalAveragePooling1D](https://keras.io/api/layers/pooling_layers/global_average_pooling1d/)

 %% Cell type:code id: tags:

 ``` python
-def get_model(dense_vector_size=128):
-
-    model = keras.Sequential()
-    model.add(keras.layers.Embedding(input_dim    = vocab_size,
-                                     output_dim   = dense_vector_size,
-                                     input_length = review_len))
-    model.add(keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2))
-    model.add(keras.layers.Dense(1,                 activation='sigmoid'))
-
-    model.compile(optimizer = 'adam',
-                  loss      = 'binary_crossentropy',
-                  metrics   = ['accuracy'])
-    return model
-```
+model = keras.Sequential(name='Embedding model')

-%% Cell type:markdown id: tags:
+model.add(keras.layers.Input( shape=(review_len,) ))
+model.add(keras.layers.Embedding( input_dim    = vocab_size,
+                                  output_dim   = dense_vector_size))
+model.add(keras.layers.GlobalAveragePooling1D())
+model.add(keras.layers.Dense(dense_vector_size, activation='relu'))
+model.add(keras.layers.Dense(1,                 activation='sigmoid'))

-## Step 5 - Train the model
-### 5.1 - Get it
-
-%% Cell type:code id: tags:
-
-``` python
-model = get_model()
+model.compile( optimizer = 'adam',
+               loss      = 'binary_crossentropy',
+               metrics   = ['accuracy'])

 model.summary()
 ```

 %% Cell type:markdown id: tags:

-### 5.2 - Add callback
+## Step 5 - Train the model
+### 5.1 Add Callbacks

 %% Cell type:code id: tags:

 ``` python
-os.makedirs('./run/models',   mode=0o750, exist_ok=True)
-save_dir = "./run/models/best_model.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)
+os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)
+save_dir = f'{run_dir}/models/best_model.keras'
+
+savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
 ```

 %% Cell type:markdown id: tags:

-### 5.1 - Train it
-GPU : batch_size=512 : 305s
+### 5.2 - Train it

 %% Cell type:code id: tags:

 ``` python
 %%time

-n_epochs   = 10
-batch_size = 32
-
 history = model.fit(x_train,
                    y_train,
-                    epochs          = n_epochs,
+                    epochs          = epochs,
                    batch_size      = batch_size,
                    validation_data = (x_test, y_test),
-                    verbose         = 1,
+                    verbose         = fit_verbosity,
                    callbacks       = [savemodel_callback])
 ```

 %% Cell type:markdown id: tags:

 ## Step 6 - Evaluate
 ### 6.1 - Training history

 %% Cell type:code id: tags:

 ``` python
-ooo.plot_history(history)
+fidle.scrawler.history(history, save_as='02-history')
 ```

 %% Cell type:markdown id: tags:

 ### 6.2 - Reload and evaluate best model

 %% Cell type:code id: tags:

 ``` python
-model = keras.models.load_model('./run/models/best_model.h5')
+model = keras.models.load_model(f'{run_dir}/models/best_model.keras')

 # ---- Evaluate
-reload(ooo)
 score  = model.evaluate(x_test, y_test, verbose=0)

 print('x_test / loss      : {:5.4f}'.format(score[0]))
 print('x_test / accuracy  : {:5.4f}'.format(score[1]))

 values=[score[1], 1-score[1]]
-ooo.plot_donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :")
+fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')

 # ---- Confusion matrix

-y_pred   = model.predict_classes(x_test)
+y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
+
+y_pred = y_sigmoid.copy()
+y_pred[ y_sigmoid< 0.5 ] = 0
+y_pred[ y_sigmoid>=0.5 ] = 1

-ooo.display_confusion_matrix(y_test,y_pred,labels=range(2),color='orange',font_size='20pt')
+fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
+fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
 ```

 %% Cell type:code id: tags:

 ``` python
+fidle.end()
 ```
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

 %% Cell type:markdown id: tags:

-Text Embedding - IMDB dataset
-=============================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
-
-## Text classification using **Text embedding** :
+<img width="800px" src="../fidle/img/header.svg"></img>

-The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+# <!-- TITLE --> [K3IMDB2] - Sentiment analysis with text embedding
+<!-- DESC --> A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+ - Understand the management of **textual data** and **sentiment analysis**

 Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
 Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
+For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)

-What we're going to do:
+## What we're going to do :

 - Retrieve data
 - Preparing the data
 - Build a model
 - Train the model
 - Evaluate the result

 %% Cell type:markdown id: tags:

-## Step 1 - Init python stuff
+## Step 1 - Import and init
+### 1.1 - Python stuff

 %% Cell type:code id: tags:

 ``` python
-import numpy as np
+import os
+os.environ['KERAS_BACKEND'] = 'torch'

-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
+import keras
+import keras.datasets.imdb as imdb

+import h5py,json
+import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
-import matplotlib
-import seaborn as sns
-
-import os,sys,h5py,json

-from importlib import reload
+import fidle

-sys.path.append('..')
-import fidle.pwk as ooo
-
-ooo.init()
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB2')
 ```

 %% Cell type:markdown id: tags:

-## Step 2 - Retrieve data
-
-**From Keras :**
-This IMDb dataset can bet get directly from [Keras datasets](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
-
-Due to their nature, textual data can be somewhat complex.
-
-### 2.1 - Data structure :
-The dataset is composed of 2 parts: **reviews** and **opinions** (positive/negative),  with a **dictionary**
-
-  - dataset = (reviews, opinions)
-    - reviews = \[ review_0, review_1, ...\]
-      - review_i = [ int1, int2, ...] where int_i is the index of the word in the dictionary.
-    - opinions = \[ int0, int1, ...\] where int_j == 0 if opinion is negative or 1 if opinion is positive.
-  - dictionary = \[ mot1:int1, mot2:int2, ... ]
-
-%% Cell type:markdown id: tags:
-
-### 2.2 - Get dataset
-For simplicity, we will use a pre-formatted dataset.
-See : https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data
-
-However, Keras offers some usefull tools for formatting textual data.
-See : https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text
+### 1.2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
+`hide_most_frequently` is the number of ignored words, among the most common ones
+`review_len` is the review length
+`dense_vector_size` is the size of the generated dense vectors
+`output_dir` is where we will go to save our dictionaries. (./data is a good choice)\
+`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch

 %% Cell type:code id: tags:

 ``` python
-vocab_size = 10000
+vocab_size           = 5000
+hide_most_frequently = 0

-# ----- Retrieve x,y
-#
-(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
-                                                       skip_top   = 0,
-                                                       maxlen     = None,
-                                                       seed       = 42,
-                                                       start_char = 1,
-                                                       oov_char   = 2,
-                                                       index_from = 3, )
-```
+review_len           = 256
+dense_vector_size    = 32

-%% Cell type:code id: tags:
-
-``` python
-print("  Max(x_train,x_test)  : ", ooo.rmax([x_train,x_test]) )
-print("  x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
-print("  x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
+epochs               = 30
+batch_size           = 512

-print('\nReview example (x_train[12]) :\n\n',x_train[12])
+output_dir           = './data'
+fit_verbosity        = 1
 ```

 %% Cell type:markdown id: tags:

-### 2.3 - Have a look for humans (optional)
-When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
-So, we shifted the dataset by 3 with the parameter index_from=3
+Override parameters (batch mode) - Just forget this cell

 %% Cell type:code id: tags:

 ``` python
-# ---- Retrieve dictionary {word:index}, and encode it in ascii
+fidle.override('vocab_size', 'hide_most_frequently', 'review_len', 'dense_vector_size')
+fidle.override('batch_size', 'epochs', 'output_dir', 'fit_verbosity')
+```

-word_index = imdb.get_word_index()
+%% Cell type:markdown id: tags:

-# ---- Shift the dictionary from +3
+## Step 2 - Retrieve data

-word_index = {w:(i+3) for w,i in word_index.items()}
+IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets)
+Note : Due to their nature, textual data can be somewhat complex.

-# ---- Add <pad>, <start> and unknown tags
+For more details about the management of this dataset, see notebook [IMDB1](01-One-hot-encoding.ipynb)

-word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )
+%% Cell type:markdown id: tags:

-# ---- Create a reverse dictionary : {index:word}
+### 2.2 - Get dataset

-index_word = {index:word for word,index in word_index.items()}
+%% Cell type:code id: tags:

-# ---- Add a nice function to transpose :
+``` python
+# ----- Retrieve x,y
 #
-def dataset2text(review):
-    return ' '.join([index_word.get(i, '?') for i in review])
-```
+start_char = 1      # Start of a sequence (padding is 0)
+oov_char   = 2      # Out-of-vocabulary
+index_from = 3      # First word id

-%% Cell type:code id: tags:
+(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
+                                                       skip_top   = hide_most_frequently,
+                                                       start_char = start_char,
+                                                       oov_char   = oov_char,
+                                                       index_from = index_from)

-``` python
-print('\nDictionary size     : ', len(word_index))
-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-print('\nIn real words :\n\n', dataset2text(x_train[12]))
+# ---- About
+#
+print("Max(x_train,x_test)  : ", fidle.utils.rmax([x_train,x_test]) )
+print("Min(x_train,x_test)  : ", fidle.utils.rmin([x_train,x_test]) )
+print("Len(x_train)         : ", len(x_train))
+print("Len(x_test)          : ", len(x_test))
 ```

 %% Cell type:markdown id: tags:

-### 2.4 - Have a look for neurons
+### 2.2 - Load dictionary
+Not essential, but nice if you want to take a closer look at our reviews ;-)

 %% Cell type:code id: tags:

 ``` python
-plt.figure(figsize=(12, 6))
-ax=sns.distplot([len(i) for i in x_train],bins=60)
-ax.set_title('Distribution of reviews by size')
-plt.xlabel("Review's sizes")
-plt.ylabel('Density')
-ax.set_xlim(0, 1500)
-plt.show()
+# ---- Retrieve dictionary {word:index}, and encode it in ascii
+#      Shift the dictionary from +3
+#      Add <pad>, <start> and <unknown> tags
+#      Create a reverse dictionary : {index:word}
+#
+word_index = imdb.get_word_index()
+word_index = {w:(i+index_from) for w,i in word_index.items()}
+word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
+index_word = {index:word for word,index in word_index.items()}
+
+# ---- A nice function to transpose :
+#
+def dataset2text(review):
+    return ' '.join([index_word.get(i, '?') for i in review])
 ```

 %% Cell type:markdown id: tags:

-## Step 3 - Preprocess the data
-In order to be processed by an NN, all entries must have the same length.
+## Step 3 - Preprocess the data (padding)
+In order to be processed by an NN, all entries must have the **same length.**
 We chose a review length of **review_len**
-We will therefore complete them with a padding (of \<pad\>\)
+We will therefore complete them with a padding (of 0 as \<pad\>\)

 %% Cell type:code id: tags:

 ``` python
-review_len = 256
-
 x_train = keras.preprocessing.sequence.pad_sequences(x_train,
                                                     value   = 0,
                                                     padding = 'post',
                                                     maxlen  = review_len)

 x_test  = keras.preprocessing.sequence.pad_sequences(x_test,
                                                     value   = 0 ,
                                                     padding = 'post',
                                                     maxlen  = review_len)

-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-print('\nIn real words :\n\n', dataset2text(x_train[12]))
+fidle.utils.subtitle('After padding :')
+print(x_train[12])
 ```

 %% Cell type:markdown id: tags:

-### Save dataset and dictionary (can be usefull)
+**Save dataset and dictionary (For future use but not mandatory)**

 %% Cell type:code id: tags:

 ``` python
-os.makedirs('./data',   mode=0o750, exist_ok=True)
+# ---- Write dataset in a h5 file, could be usefull
+#
+fidle.utils.mkdir(output_dir)

-with h5py.File('./data/dataset_imdb.h5', 'w') as f:
+with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:
    f.create_dataset("x_train",    data=x_train)
    f.create_dataset("y_train",    data=y_train)
    f.create_dataset("x_test",     data=x_test)
    f.create_dataset("y_test",     data=y_test)
+    print('Dataset h5 file saved.')

-with open('./data/word_index.json', 'w') as fp:
+with open(f'{output_dir}/word_index.json', 'w') as fp:
    json.dump(word_index, fp)
-
-with open('./data/index_word.json', 'w') as fp:
-    json.dump(index_word, fp)
-
-print('Saved.')
+    print('Word to index saved.')
 ```

 %% Cell type:markdown id: tags:

 ## Step 4 - Build the model
-Few remarks :
-1. We'll choose a dense vector size for the embedding output with **dense_vector_size**
-2. **GlobalAveragePooling1D** do a pooling on the last dimension : (None, lx, ly) -> (None, ly)
-In other words: we average the set of vectors/words of a sentence
-3. L'embedding de Keras fonctionne de manière supervisée. Il s'agit d'une couche de *vocab_size* neurones vers *n_neurons* permettant de maintenir une table de vecteurs (les poids constituent les vecteurs). Cette couche ne calcule pas de sortie a la façon des couches normales, mais renvois la valeur des vecteurs. n mots => n vecteurs (ensuite empilés par le pooling)
-Voir : https://stats.stackexchange.com/questions/324992/how-the-embedding-layer-is-trained-in-keras-embedding-layer
-
-A SUIVRE : https://www.liip.ch/en/blog/sentiment-detection-with-keras-word-embeddings-and-lstm-deep-learning-networks
-### 4.1 - Build
-More documentation about :
- - [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding)
- - [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D)
+
+More documentation about this model functions :
+ - [Embedding](https://keras.io/api/layers/core_layers/embedding/)
+ - [GlobalAveragePooling1D](https://keras.io/api/layers/pooling_layers/global_average_pooling1d/)

 %% Cell type:code id: tags:

 ``` python
-def get_model(dense_vector_size=128):
-
-    model = keras.Sequential()
-    model.add(keras.layers.Embedding(input_dim    = vocab_size,
-                                     output_dim   = dense_vector_size,
-                                     input_length = review_len))
-    model.add(keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2))
-    model.add(keras.layers.Dense(1,                 activation='sigmoid'))
-
-    model.compile(optimizer = 'adam',
-                  loss      = 'binary_crossentropy',
-                  metrics   = ['accuracy'])
-    return model
-```
+model = keras.Sequential(name='Embedding model')

-%% Cell type:markdown id: tags:
+model.add(keras.layers.Input( shape=(review_len,) ))
+model.add(keras.layers.Embedding( input_dim    = vocab_size,
+                                  output_dim   = dense_vector_size))
+model.add(keras.layers.GlobalAveragePooling1D())
+model.add(keras.layers.Dense(dense_vector_size, activation='relu'))
+model.add(keras.layers.Dense(1,                 activation='sigmoid'))

-## Step 5 - Train the model
-### 5.1 - Get it
-
-%% Cell type:code id: tags:
-
-``` python
-model = get_model()
+model.compile( optimizer = 'adam',
+               loss      = 'binary_crossentropy',
+               metrics   = ['accuracy'])

 model.summary()
 ```

 %% Cell type:markdown id: tags:

-### 5.2 - Add callback
+## Step 5 - Train the model
+### 5.1 Add Callbacks

 %% Cell type:code id: tags:

 ``` python
-os.makedirs('./run/models',   mode=0o750, exist_ok=True)
-save_dir = "./run/models/best_model.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)
+os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)
+save_dir = f'{run_dir}/models/best_model.keras'
+
+savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
 ```

 %% Cell type:markdown id: tags:

-### 5.1 - Train it
-GPU : batch_size=512 : 305s
+### 5.2 - Train it

 %% Cell type:code id: tags:

 ``` python
 %%time

-n_epochs   = 10
-batch_size = 32
-
 history = model.fit(x_train,
                    y_train,
-                    epochs          = n_epochs,
+                    epochs          = epochs,
                    batch_size      = batch_size,
                    validation_data = (x_test, y_test),
-                    verbose         = 1,
+                    verbose         = fit_verbosity,
                    callbacks       = [savemodel_callback])
 ```

 %% Cell type:markdown id: tags:

 ## Step 6 - Evaluate
 ### 6.1 - Training history

 %% Cell type:code id: tags:

 ``` python
-ooo.plot_history(history)
+fidle.scrawler.history(history, save_as='02-history')
 ```

 %% Cell type:markdown id: tags:

 ### 6.2 - Reload and evaluate best model

 %% Cell type:code id: tags:

 ``` python
-model = keras.models.load_model('./run/models/best_model.h5')
+model = keras.models.load_model(f'{run_dir}/models/best_model.keras')

 # ---- Evaluate
-reload(ooo)
 score  = model.evaluate(x_test, y_test, verbose=0)

 print('x_test / loss      : {:5.4f}'.format(score[0]))
 print('x_test / accuracy  : {:5.4f}'.format(score[1]))

 values=[score[1], 1-score[1]]
-ooo.plot_donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :")
+fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')

 # ---- Confusion matrix

-y_pred   = model.predict_classes(x_test)
+y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
+
+y_pred = y_sigmoid.copy()
+y_pred[ y_sigmoid< 0.5 ] = 0
+y_pred[ y_sigmoid>=0.5 ] = 1

-ooo.display_confusion_matrix(y_test,y_pred,labels=range(2),color='orange',font_size='20pt')
+fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
+fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
 ```

 %% Cell type:code id: tags:

 ``` python
+fidle.end()
 ```
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

--- a/IMDB/02-Prediction.ipynb
+++ b/IMDB/02-Prediction.ipynb
 %% Cell type:markdown id: tags:

-Text Embedding - IMDB dataset
-=============================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+<img width="800px" src="../fidle/img/header.svg"></img>

-## Reviews analysis :
+# <!-- TITLE --> [K3IMDB3] - Reload and reuse a saved model
+<!-- DESC --> Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->

-The objective is to guess whether our new and personals films reviews are **positive or negative** .
-For this, we will use our previously saved model.
+## Objectives :
+ - The objective is to guess whether our personal film reviews are **positive or negative** based on the analysis of the text.
+ - For this, we will use our **previously saved model**.

-What we're going to do:
+## What we're going to do :

- - Preparing the data
+ - Preparing our data
 - Retrieve our saved model
 - Evaluate the result

 %% Cell type:markdown id: tags:

 ## Step 1 - Init python stuff

 %% Cell type:code id: tags:

 ``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras
+
+import json,re
 import numpy as np

-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
+import fidle

-import matplotlib.pyplot as plt
-import matplotlib
-import seaborn as sns
-import pandas as pd
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB3')
+```
+
+%% Cell type:markdown id: tags:

-import os,sys,h5py,json,re
+### 1.2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
+`review_len` is the review length
+`saved_models` where our models were previously saved
+`dictionaries_dir` is where we will go to save our dictionaries. (./data is a good choice)

-from importlib import reload
+%% Cell type:code id: tags:

-sys.path.append('..')
-import fidle.pwk as ooo
+``` python
+vocab_size           = 10000
+review_len           = 256

-ooo.init()
+saved_models         = './run/K3IMDB2'
+dictionaries_dir     = './data'
+```
+
+%% Cell type:markdown id: tags:
+
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.override('vocab_size', 'review_len', 'saved_models', 'dictionaries_dir')
 ```

 %% Cell type:markdown id: tags:

 ## Step 2 : Preparing the data
 ### 2.1 - Our reviews :

 %% Cell type:code id: tags:

 ``` python
 reviews = [ "This film is particularly nice, a must see.",
-             "Some films are classics and cannot be ignored.",
+             "This film is a great classic that cannot be ignored.",
+             "I don't remember ever having seen such a movie...",
             "This movie is just abominable and doesn't deserve to be seen!"]
 ```

 %% Cell type:markdown id: tags:

 ### 2.2 - Retrieve dictionaries
+Note : This dictionary is generated by [02-Embedding-Keras](02-Keras-embedding.ipynb) notebook.

 %% Cell type:code id: tags:

 ``` python
-with open('./data/word_index.json', 'r') as fp:
+with open(f'{dictionaries_dir}/word_index.json', 'r') as fp:
    word_index = json.load(fp)
-    index_word = {index:word for word,index in word_index.items()}
+    index_word = { i:w      for w,i in word_index.items() }
+    print('Dictionaries loaded. ', len(word_index), 'entries' )
 ```

 %% Cell type:markdown id: tags:

 ### 2.3 - Clean, index and padd
+Phases are split into words, punctuation is removed, sentence length is limited and padding is added...
+**Note** : 1 is "Start" and 2 is "unknown"

 %% Cell type:code id: tags:

 ``` python
-max_len    = 256
-vocab_size = 10000
-
+start_char = 1      # Start of a sequence (padding is 0)
+oov_char   = 2      # Out-of-vocabulary
+index_from = 3      # First word id

 nb_reviews = len(reviews)
 x_data     = []

 # ---- For all reviews
 for review in reviews:
+    print('Words are : ', end='')
    # ---- First index must be <start>
-    index_review=[1]
+    index_review=[start_char]
+    print(f'{start_char} ', end='')
    # ---- For all words
    for w in review.split(' '):
        # ---- Clean it
        w_clean = re.sub(r"[^a-zA-Z0-9]", "", w)
        # ---- Not empty ?
        if len(w_clean)>0:
-            # ---- Get the index
-            w_index = word_index.get(w,2)
-            if w_index>vocab_size : w_index=2
+            # ---- Get the index - must be inside dict or is out of vocab (oov)
+            w_index = word_index.get(w, oov_char)
+            if w_index>vocab_size : w_index=oov_char
            # ---- Add the index if < vocab_size
            index_review.append(w_index)
+            print(f'{w_index} ', end='')
    # ---- Add the indexed review
    x_data.append(index_review)
+    print()

 # ---- Padding
-x_data = keras.preprocessing.sequence.pad_sequences(x_data, value   = 0, padding = 'post', maxlen  = max_len)
+x_data = keras.preprocessing.sequence.pad_sequences(x_data, value   = 0, padding = 'post', maxlen  = review_len)
 ```

 %% Cell type:markdown id: tags:

 ### 2.4 - Have a look

 %% Cell type:code id: tags:

 ``` python
 def translate(x):
    return ' '.join( [index_word.get(i,'?') for i in x] )

 for i in range(nb_reviews):
    imax=np.where(x_data[i]==0)[0][0]+5
-    print(f'\nText review      :',    reviews[i])
-    print(  f'x_train[{i:}]       :', list(x_data[i][:imax]), '(...)')
-    print(  'Translation      :', translate(x_data[i][:imax]), '(...)')
+    print(f'\nText review {i}  :',    reviews[i])
+    print(f'tokens vector  :', list(x_data[i][:imax]), '(...)')
+    print('Translation    :', translate(x_data[i][:imax]), '(...)')
 ```

 %% Cell type:markdown id: tags:

-## Step 2 - Bring back the model
+## Step 3 - Bring back the model

 %% Cell type:code id: tags:

 ``` python
-model = keras.models.load_model('./run/models/best_model.h5')
+model = keras.models.load_model(f'{saved_models}/models/best_model.keras')
 ```

 %% Cell type:markdown id: tags:

 ## Step 4 - Predict

 %% Cell type:code id: tags:

 ``` python
-y_pred   = model.predict(x_data)
+y_pred   = model.predict(x_data, verbose=0)
 ```

 %% Cell type:markdown id: tags:

 #### And the winner is :

 %% Cell type:code id: tags:

 ``` python
-for i in range(nb_reviews):
-    print(f'\n{reviews[i]:<70} =>',('NEGATIVE' if y_pred[i][0]<0.5 else 'POSITIVE'),f'({y_pred[i][0]:.2f})')
+for i,review in enumerate(reviews):
+    rate    = y_pred[i][0]
+    opinion =  'NEGATIVE :-(' if rate<0.5 else 'POSITIVE :-)'
+    print(f'{review:<70} => {rate:.2f} - {opinion}')
 ```

 %% Cell type:code id: tags:

 ``` python
-a=[1]+[i for i in range(3)]
-a
+fidle.end()
 ```

-%% Cell type:code id: tags:
+%% Cell type:markdown id: tags:

-``` python
-```
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

 %% Cell type:markdown id: tags:

-Text Embedding - IMDB dataset
-=============================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+<img width="800px" src="../fidle/img/header.svg"></img>

-## Reviews analysis :
+# <!-- TITLE --> [K3IMDB3] - Reload and reuse a saved model
+<!-- DESC --> Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->

-The objective is to guess whether our new and personals films reviews are **positive or negative** .
-For this, we will use our previously saved model.
+## Objectives :
+ - The objective is to guess whether our personal film reviews are **positive or negative** based on the analysis of the text.
+ - For this, we will use our **previously saved model**.

-What we're going to do:
+## What we're going to do :

- - Preparing the data
+ - Preparing our data
 - Retrieve our saved model
 - Evaluate the result

 %% Cell type:markdown id: tags:

 ## Step 1 - Init python stuff

 %% Cell type:code id: tags:

 ``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras
+
+import json,re
 import numpy as np

-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
+import fidle

-import matplotlib.pyplot as plt
-import matplotlib
-import seaborn as sns
-import pandas as pd
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB3')
+```
+
+%% Cell type:markdown id: tags:

-import os,sys,h5py,json,re
+### 1.2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
+`review_len` is the review length
+`saved_models` where our models were previously saved
+`dictionaries_dir` is where we will go to save our dictionaries. (./data is a good choice)

-from importlib import reload
+%% Cell type:code id: tags:

-sys.path.append('..')
-import fidle.pwk as ooo
+``` python
+vocab_size           = 10000
+review_len           = 256

-ooo.init()
+saved_models         = './run/K3IMDB2'
+dictionaries_dir     = './data'
+```
+
+%% Cell type:markdown id: tags:
+
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.override('vocab_size', 'review_len', 'saved_models', 'dictionaries_dir')
 ```

 %% Cell type:markdown id: tags:

 ## Step 2 : Preparing the data
 ### 2.1 - Our reviews :

 %% Cell type:code id: tags:

 ``` python
 reviews = [ "This film is particularly nice, a must see.",
-             "Some films are classics and cannot be ignored.",
+             "This film is a great classic that cannot be ignored.",
+             "I don't remember ever having seen such a movie...",
             "This movie is just abominable and doesn't deserve to be seen!"]
 ```

 %% Cell type:markdown id: tags:

 ### 2.2 - Retrieve dictionaries
+Note : This dictionary is generated by [02-Embedding-Keras](02-Keras-embedding.ipynb) notebook.

 %% Cell type:code id: tags:

 ``` python
-with open('./data/word_index.json', 'r') as fp:
+with open(f'{dictionaries_dir}/word_index.json', 'r') as fp:
    word_index = json.load(fp)
-    index_word = {index:word for word,index in word_index.items()}
+    index_word = { i:w      for w,i in word_index.items() }
+    print('Dictionaries loaded. ', len(word_index), 'entries' )
 ```

 %% Cell type:markdown id: tags:

 ### 2.3 - Clean, index and padd
+Phases are split into words, punctuation is removed, sentence length is limited and padding is added...
+**Note** : 1 is "Start" and 2 is "unknown"

 %% Cell type:code id: tags:

 ``` python
-max_len    = 256
-vocab_size = 10000
-
+start_char = 1      # Start of a sequence (padding is 0)
+oov_char   = 2      # Out-of-vocabulary
+index_from = 3      # First word id

 nb_reviews = len(reviews)
 x_data     = []

 # ---- For all reviews
 for review in reviews:
+    print('Words are : ', end='')
    # ---- First index must be <start>
-    index_review=[1]
+    index_review=[start_char]
+    print(f'{start_char} ', end='')
    # ---- For all words
    for w in review.split(' '):
        # ---- Clean it
        w_clean = re.sub(r"[^a-zA-Z0-9]", "", w)
        # ---- Not empty ?
        if len(w_clean)>0:
-            # ---- Get the index
-            w_index = word_index.get(w,2)
-            if w_index>vocab_size : w_index=2
+            # ---- Get the index - must be inside dict or is out of vocab (oov)
+            w_index = word_index.get(w, oov_char)
+            if w_index>vocab_size : w_index=oov_char
            # ---- Add the index if < vocab_size
            index_review.append(w_index)
+            print(f'{w_index} ', end='')
    # ---- Add the indexed review
    x_data.append(index_review)
+    print()

 # ---- Padding
-x_data = keras.preprocessing.sequence.pad_sequences(x_data, value   = 0, padding = 'post', maxlen  = max_len)
+x_data = keras.preprocessing.sequence.pad_sequences(x_data, value   = 0, padding = 'post', maxlen  = review_len)
 ```

 %% Cell type:markdown id: tags:

 ### 2.4 - Have a look

 %% Cell type:code id: tags:

 ``` python
 def translate(x):
    return ' '.join( [index_word.get(i,'?') for i in x] )

 for i in range(nb_reviews):
    imax=np.where(x_data[i]==0)[0][0]+5
-    print(f'\nText review      :',    reviews[i])
-    print(  f'x_train[{i:}]       :', list(x_data[i][:imax]), '(...)')
-    print(  'Translation      :', translate(x_data[i][:imax]), '(...)')
+    print(f'\nText review {i}  :',    reviews[i])
+    print(f'tokens vector  :', list(x_data[i][:imax]), '(...)')
+    print('Translation    :', translate(x_data[i][:imax]), '(...)')
 ```

 %% Cell type:markdown id: tags:

-## Step 2 - Bring back the model
+## Step 3 - Bring back the model

 %% Cell type:code id: tags:

 ``` python
-model = keras.models.load_model('./run/models/best_model.h5')
+model = keras.models.load_model(f'{saved_models}/models/best_model.keras')
 ```

 %% Cell type:markdown id: tags:

 ## Step 4 - Predict

 %% Cell type:code id: tags:

 ``` python
-y_pred   = model.predict(x_data)
+y_pred   = model.predict(x_data, verbose=0)
 ```

 %% Cell type:markdown id: tags:

 #### And the winner is :

 %% Cell type:code id: tags:

 ``` python
-for i in range(nb_reviews):
-    print(f'\n{reviews[i]:<70} =>',('NEGATIVE' if y_pred[i][0]<0.5 else 'POSITIVE'),f'({y_pred[i][0]:.2f})')
+for i,review in enumerate(reviews):
+    rate    = y_pred[i][0]
+    opinion =  'NEGATIVE :-(' if rate<0.5 else 'POSITIVE :-)'
+    print(f'{review:<70} => {rate:.2f} - {opinion}')
 ```

 %% Cell type:code id: tags:

 ``` python
-a=[1]+[i for i in range(3)]
-a
+fidle.end()
 ```

-%% Cell type:code id: tags:
+%% Cell type:markdown id: tags:

-``` python
-```
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

--- a/Embedding.Keras3/04-Show-vectors.ipynb
+++ b/Embedding.Keras3/04-Show-vectors.ipynb
+%% Cell type:markdown id: tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3IMDB4] - Reload embedded vectors
+<!-- DESC --> Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - The objective is to retrieve and visualize our embedded vectors
+ - For this, we will use our **previously saved model**.
+
+## What we're going to do :
+
+ - Retrieve our saved model
+ - Extract vectors and play with
+
+%% Cell type:markdown id: tags:
+
+## Step 1 - Init python stuff
+
+%% Cell type:code id: tags:
+
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras
+
+import json,re
+import numpy as np
+
+import fidle
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB4')
+```
+
+%% Cell type:markdown id: tags:
+
+### 1.2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
+`review_len` is the review length
+`saved_models` where our models were previously saved
+`dictionaries_dir` is where we will go to save our dictionaries. (./data is a good choice)
+
+%% Cell type:code id: tags:
+
+``` python
+vocab_size           = 5000
+review_len           = 256
+
+saved_models         = './run/K3IMDB2'
+dictionaries_dir     = './data'
+```
+
+%% Cell type:markdown id: tags:
+
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.override('vocab_size', 'review_len', 'saved_models', 'dictionaries_dir')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Get the embedding vectors !
+
+%% Cell type:markdown id: tags:
+
+### 2.1 - Load model and dictionaries
+Note : This dictionary is generated by [02-Embedding-Keras](02-Keras-embedding.ipynb) notebook.
+
+%% Cell type:code id: tags:
+
+``` python
+model = keras.models.load_model(f'{saved_models}/models/best_model.keras')
+print('Model loaded.')
+
+with open(f'{dictionaries_dir}/word_index.json', 'r') as fp:
+    word_index = json.load(fp)
+    index_word = { i:w      for w,i in word_index.items() }
+    print('Dictionaries loaded. ', len(word_index), 'entries' )
+```
+
+%% Cell type:markdown id: tags:
+
+### 2.2 - Retrieve embeddings
+
+%% Cell type:code id: tags:
+
+``` python
+embeddings = model.layers[0].get_weights()[0]
+print('Shape of embeddings : ',embeddings.shape)
+```
+
+%% Cell type:markdown id: tags:
+
+### 2.3 - Build a nice dictionary
+
+%% Cell type:code id: tags:
+
+``` python
+word_embedding = { index_word[i]:embeddings[i] for i in range(vocab_size) }
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Have a look !
+#### Show embedding of a word :
+
+%% Cell type:code id: tags:
+
+``` python
+word_embedding['nice']
+```
+
+%% Cell type:markdown id: tags:
+
+#### Few usefull functions to play with
+
+%% Cell type:code id: tags:
+
+``` python
+# Return a l2 distance between 2 words
+#
+def l2w(w1,w2):
+    v1=word_embedding[w1]
+    v2=word_embedding[w2]
+    return np.linalg.norm(v2-v1)
+
+# Show distance between 2 words
+#
+def show_l2(w1,w2):
+    print(f'\nL2 between [{w1}] and [{w2}] : ',l2w(w1,w2))
+
+# Displays the 15 closest words to a given word
+#
+def neighbors(w1):
+    v1=word_embedding[w1]
+    dd={}
+    for i in range(4, 1000):
+        w2=index_word[i]
+        dd[w2]=l2w(w1,w2)
+    dd= {k: v for k, v in sorted(dd.items(), key=lambda item: item[1])}
+    print(f'\nNeighbors of [{w1}] : ', list(dd.keys())[1:15])
+
+```
+
+%% Cell type:markdown id: tags:
+
+#### Examples
+
+%% Cell type:code id: tags:
+
+``` python
+show_l2('nice', 'pleasant')
+show_l2('nice', 'horrible')
+
+neighbors('horrible')
+neighbors('great')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.end()
+```
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:markdown id: tags:
+
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3IMDB4] - Reload embedded vectors
+<!-- DESC --> Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - The objective is to retrieve and visualize our embedded vectors
+ - For this, we will use our **previously saved model**.
+
+## What we're going to do :
+
+ - Retrieve our saved model
+ - Extract vectors and play with
+
+%% Cell type:markdown id: tags:
+
+## Step 1 - Init python stuff
+
+%% Cell type:code id: tags:
+
+``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras
+
+import json,re
+import numpy as np
+
+import fidle
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB4')
+```
+
+%% Cell type:markdown id: tags:
+
+### 1.2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
+`review_len` is the review length
+`saved_models` where our models were previously saved
+`dictionaries_dir` is where we will go to save our dictionaries. (./data is a good choice)
+
+%% Cell type:code id: tags:
+
+``` python
+vocab_size           = 5000
+review_len           = 256
+
+saved_models         = './run/K3IMDB2'
+dictionaries_dir     = './data'
+```
+
+%% Cell type:markdown id: tags:
+
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.override('vocab_size', 'review_len', 'saved_models', 'dictionaries_dir')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Get the embedding vectors !
+
+%% Cell type:markdown id: tags:
+
+### 2.1 - Load model and dictionaries
+Note : This dictionary is generated by [02-Embedding-Keras](02-Keras-embedding.ipynb) notebook.
+
+%% Cell type:code id: tags:
+
+``` python
+model = keras.models.load_model(f'{saved_models}/models/best_model.keras')
+print('Model loaded.')
+
+with open(f'{dictionaries_dir}/word_index.json', 'r') as fp:
+    word_index = json.load(fp)
+    index_word = { i:w      for w,i in word_index.items() }
+    print('Dictionaries loaded. ', len(word_index), 'entries' )
+```
+
+%% Cell type:markdown id: tags:
+
+### 2.2 - Retrieve embeddings
+
+%% Cell type:code id: tags:
+
+``` python
+embeddings = model.layers[0].get_weights()[0]
+print('Shape of embeddings : ',embeddings.shape)
+```
+
+%% Cell type:markdown id: tags:
+
+### 2.3 - Build a nice dictionary
+
+%% Cell type:code id: tags:
+
+``` python
+word_embedding = { index_word[i]:embeddings[i] for i in range(vocab_size) }
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 3 - Have a look !
+#### Show embedding of a word :
+
+%% Cell type:code id: tags:
+
+``` python
+word_embedding['nice']
+```
+
+%% Cell type:markdown id: tags:
+
+#### Few usefull functions to play with
+
+%% Cell type:code id: tags:
+
+``` python
+# Return a l2 distance between 2 words
+#
+def l2w(w1,w2):
+    v1=word_embedding[w1]
+    v2=word_embedding[w2]
+    return np.linalg.norm(v2-v1)
+
+# Show distance between 2 words
+#
+def show_l2(w1,w2):
+    print(f'\nL2 between [{w1}] and [{w2}] : ',l2w(w1,w2))
+
+# Displays the 15 closest words to a given word
+#
+def neighbors(w1):
+    v1=word_embedding[w1]
+    dd={}
+    for i in range(4, 1000):
+        w2=index_word[i]
+        dd[w2]=l2w(w1,w2)
+    dd= {k: v for k, v in sorted(dd.items(), key=lambda item: item[1])}
+    print(f'\nNeighbors of [{w1}] : ', list(dd.keys())[1:15])
+
+```
+
+%% Cell type:markdown id: tags:
+
+#### Examples
+
+%% Cell type:code id: tags:
+
+``` python
+show_l2('nice', 'pleasant')
+show_l2('nice', 'horrible')
+
+neighbors('horrible')
+neighbors('great')
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.end()
+```
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>
--- a/IMDB/01-Embedding-Keras.ipynb
+++ b/IMDB/01-Embedding-Keras.ipynb
 %% Cell type:markdown id: tags:

-Text Embedding - IMDB dataset
-=============================
---
-Formation Introduction au Deep Learning  (FIDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
-
-## Text classification using **Text embedding** :
+<img width="800px" src="../fidle/img/header.svg"></img>

-The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+# <!-- TITLE --> [K3IMDB5] - Sentiment analysis with a RNN network
+<!-- DESC --> Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+ - Use of a model combining embedding and LSTM

 Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
 Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
+For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)

-What we're going to do:
+## What we're going to do :

 - Retrieve data
 - Preparing the data
- - Build a model
+ - Build a Embedding/LSTM model
 - Train the model
 - Evaluate the result

 %% Cell type:markdown id: tags:

 ## Step 1 - Init python stuff

 %% Cell type:code id: tags:

 ``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras
+import keras.datasets.imdb as imdb
+
+import json,re
 import numpy as np

-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
+import fidle
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB5')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
+`hide_most_frequently` is the number of ignored words, among the most common ones
+`review_len` is the review length
+`dense_vector_size` is the size of the generated dense vectors
+`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch\
+`scale` is a dataset scale factor - note a scale=1 need a training time > 10'

-import matplotlib.pyplot as plt
-import matplotlib
-import seaborn as sns
+%% Cell type:code id: tags:

-import os,sys,h5py,json
+``` python
+vocab_size           = 10000
+hide_most_frequently = 0

-from importlib import reload
+review_len           = 256
+dense_vector_size    = 32

-sys.path.append('..')
-import fidle.pwk as ooo
+epochs               = 10
+batch_size           = 128

-ooo.init()
+fit_verbosity        = 1
+scale                = 0.2
 ```

 %% Cell type:markdown id: tags:

-## Step 2 - Retrieve data
+Override parameters (batch mode) - Just forget this cell

-**From Keras :**
-This IMDb dataset can bet get directly from [Keras datasets](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
+%% Cell type:code id: tags:

-Due to their nature, textual data can be somewhat complex.
+``` python
+fidle.override('vocab_size', 'hide_most_frequently', 'review_len', 'dense_vector_size')
+fidle.override('batch_size', 'epochs', 'fit_verbosity', 'scale')
+```

-### 2.1 - Data structure :
-The dataset is composed of 2 parts: **reviews** and **opinions** (positive/negative),  with a **dictionary**
+%% Cell type:markdown id: tags:
+
+## Step 3 - Retrieve data

-  - dataset = (reviews, opinions)
-    - reviews = \[ review_0, review_1, ...\]
-      - review_i = [ int1, int2, ...] where int_i is the index of the word in the dictionary.
-    - opinions = \[ int0, int1, ...\] where int_j == 0 if opinion is negative or 1 if opinion is positive.
-  - dictionary = \[ mot1:int1, mot2:int2, ... ]
+IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets)
+Note : Due to their nature, textual data can be somewhat complex.

 %% Cell type:markdown id: tags:

-### 2.2 - Get dataset
-For simplicity, we will use a pre-formatted dataset.
-See : https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data
+### 3.1 - Get dataset
+For simplicity, we will use a pre-formatted dataset - See [documentation](https://keras.io/api/datasets/imdb/)
+However, Keras offers some usefull tools for formatting textual data - See [documentation](https://keras.io/api/layers/preprocessing_layers/text/text_vectorization/)

-However, Keras offers some usefull tools for formatting textual data.
-See : https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text
+**Load dataset :**

 %% Cell type:code id: tags:

 ``` python
-vocab_size = 10000
-
 # ----- Retrieve x,y
 #
-(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
-                                                       skip_top   = 0,
-                                                       maxlen     = None,
-                                                       seed       = 42,
-                                                       start_char = 1,
-                                                       oov_char   = 2,
-                                                       index_from = 3, )
-```
+start_char = 1      # Start of a sequence (padding is 0)
+oov_char   = 2      # Out-of-vocabulary
+index_from = 3      # First word id

-%% Cell type:code id: tags:
+(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
+                                                       skip_top   = hide_most_frequently,
+                                                       start_char = start_char,
+                                                       oov_char   = oov_char,
+                                                       index_from = index_from)

-``` python
-print("  Max(x_train,x_test)  : ", ooo.rmax([x_train,x_test]) )
-print("  x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
-print("  x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
+# ---- Rescale
+#
+n1 = int(scale * len(x_train))
+n2 = int(scale * len(x_test))
+x_train, y_train = x_train[:n1], y_train[:n1]
+x_test,  y_test  = x_test[:n2],  y_test[:n2]

-print('\nReview example (x_train[12]) :\n\n',x_train[12])
+# ---- About
+#
+print("Max(x_train,x_test)  : ", fidle.utils.rmax([x_train,x_test]) )
+print("Min(x_train,x_test)  : ", fidle.utils.rmin([x_train,x_test]) )
+print("Len(x_train)         : ", len(x_train))
+print("Len(x_test)          : ", len(x_test))
 ```

 %% Cell type:markdown id: tags:

-### 2.3 - Have a look for humans (optional)
+### 3.2 - Have a look for humans (optional)
 When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
 So, we shifted the dataset by 3 with the parameter index_from=3

+**Load dictionary :**
+
 %% Cell type:code id: tags:

 ``` python
 # ---- Retrieve dictionary {word:index}, and encode it in ascii
-
+#      Shift the dictionary from +3
+#      Add <pad>, <start> and <unknown> tags
+#      Create a reverse dictionary : {index:word}
+#
 word_index = imdb.get_word_index()
-
-# ---- Shift the dictionary from +3
-
-word_index = {w:(i+3) for w,i in word_index.items()}
-
-# ---- Add <pad>, <start> and unknown tags
-
-word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )
-
-# ---- Create a reverse dictionary : {index:word}
-
+word_index = {w:(i+index_from) for w,i in word_index.items()}
+word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
 index_word = {index:word for word,index in word_index.items()}

-# ---- Add a nice function to transpose :
+# ---- A nice function to transpose :
 #
 def dataset2text(review):
    return ' '.join([index_word.get(i, '?') for i in review])
 ```

-%% Cell type:code id: tags:
-
-``` python
-print('\nDictionary size     : ', len(word_index))
-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-print('\nIn real words :\n\n', dataset2text(x_train[12]))
-```
-
 %% Cell type:markdown id: tags:

-### 2.4 - Have a look for neurons
+**Have a look :**

 %% Cell type:code id: tags:

 ``` python
-plt.figure(figsize=(12, 6))
-ax=sns.distplot([len(i) for i in x_train],bins=60)
-ax.set_title('Distribution of reviews by size')
-plt.xlabel("Review's sizes")
-plt.ylabel('Density')
-ax.set_xlim(0, 1500)
-plt.show()
+print('\nDictionary size     : ', len(word_index))
+for k in range(440,455):print(f'{k:2d} : {index_word[k]}' )
+fidle.utils.subtitle('Review example :')
+print(x_train[12])
+fidle.utils.subtitle('After translation :')
+print(dataset2text(x_train[12]))
 ```

 %% Cell type:markdown id: tags:

-## Step 3 - Preprocess the data
-In order to be processed by an NN, all entries must have the same length.
+## Step 4 - Preprocess the data (padding)
+In order to be processed by an NN, all entries must have the **same length.**
 We chose a review length of **review_len**
 We will therefore complete them with a padding (of \<pad\>\)

 %% Cell type:code id: tags:

 ``` python
-review_len = 256
-
 x_train = keras.preprocessing.sequence.pad_sequences(x_train,
                                                     value   = 0,
                                                     padding = 'post',
                                                     maxlen  = review_len)

 x_test  = keras.preprocessing.sequence.pad_sequences(x_test,
                                                     value   = 0 ,
                                                     padding = 'post',
                                                     maxlen  = review_len)

-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-print('\nIn real words :\n\n', dataset2text(x_train[12]))
-```
-
-%% Cell type:markdown id: tags:
-
-### Save dataset and dictionary (can be usefull)
-
-%% Cell type:code id: tags:
-
-``` python
-os.makedirs('./data',   mode=0o750, exist_ok=True)
-
-with h5py.File('./data/dataset_imdb.h5', 'w') as f:
-    f.create_dataset("x_train",    data=x_train)
-    f.create_dataset("y_train",    data=y_train)
-    f.create_dataset("x_test",     data=x_test)
-    f.create_dataset("y_test",     data=y_test)
-
-with open('./data/word_index.json', 'w') as fp:
-    json.dump(word_index, fp)
-
-with open('./data/index_word.json', 'w') as fp:
-    json.dump(index_word, fp)
-
-print('Saved.')
+fidle.utils.subtitle('After padding :')
+print(x_train[12])
+fidle.utils.subtitle('In real words :')
+print(dataset2text(x_train[12]))
 ```

 %% Cell type:markdown id: tags:

-## Step 4 - Build the model
-Few remarks :
-1. We'll choose a dense vector size for the embedding output with **dense_vector_size**
-2. **GlobalAveragePooling1D** do a pooling on the last dimension : (None, lx, ly) -> (None, ly)
-In other words: we average the set of vectors/words of a sentence
-3. L'embedding de Keras fonctionne de manière supervisée. Il s'agit d'une couche de *vocab_size* neurones vers *n_neurons* permettant de maintenir une table de vecteurs (les poids constituent les vecteurs). Cette couche ne calcule pas de sortie a la façon des couches normales, mais renvois la valeur des vecteurs. n mots => n vecteurs (ensuite empilés par le pooling)
-Voir : https://stats.stackexchange.com/questions/324992/how-the-embedding-layer-is-trained-in-keras-embedding-layer
+## Step 5 - Build the model

-A SUIVRE : https://www.liip.ch/en/blog/sentiment-detection-with-keras-word-embeddings-and-lstm-deep-learning-networks
-### 4.1 - Build
-More documentation about :
- - [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding)
- - [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D)
+More documentation about this model functions :
+ - [Embedding](https://keras.io/api/layers/core_layers/embedding/)
+ - [GlobalAveragePooling1D](https://keras.io/api/layers/pooling_layers/global_average_pooling1d)

 %% Cell type:code id: tags:

 ``` python
-def get_model(dense_vector_size=32):
+model = keras.Sequential()
+model.add(keras.layers.Embedding(input_dim = vocab_size, output_dim = dense_vector_size))
+model.add(keras.layers.GRU(50))
+model.add(keras.layers.Dense(1, activation='sigmoid'))

-    model = keras.Sequential()
-    model.add(keras.layers.Embedding(input_dim    = vocab_size,
-                                     output_dim   = dense_vector_size,
-                                     input_length = review_len))
-    model.add(keras.layers.GlobalAveragePooling1D())
-    model.add(keras.layers.Dense(dense_vector_size, activation='relu'))
-    model.add(keras.layers.Dense(1,                 activation='sigmoid'))
-
-    model.compile(optimizer = 'adam',
-                  loss      = 'binary_crossentropy',
-                  metrics   = ['accuracy'])
-    return model
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Train the model
-### 5.1 - Get it
-
-%% Cell type:code id: tags:
-
-``` python
-model = get_model(32)
+model.compile(optimizer = 'rmsprop',
+              loss      = 'binary_crossentropy',
+              metrics   = ['accuracy'])

 model.summary()
 ```

 %% Cell type:markdown id: tags:

-### 5.2 - Add callback
+## Step 6 - Train the model
+### 6.1 - Add Callbacks

 %% Cell type:code id: tags:

 ``` python
-os.makedirs('./run/models',   mode=0o750, exist_ok=True)
-save_dir = "./run/models/best_model.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)
+os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)
+save_dir = f'{run_dir}/models/best_model.keras'
+
+savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
 ```

 %% Cell type:markdown id: tags:

-### 5.1 - Train it
+### 6.2 - Train it
+Note : With a scale=0.2, batch_size=128, epochs=10 => Need 4' on a cpu laptop

 %% Cell type:code id: tags:

 ``` python
-%%time
-
-n_epochs   = 30
-batch_size = 512
-
 history = model.fit(x_train,
                    y_train,
-                    epochs          = n_epochs,
+                    epochs          = epochs,
                    batch_size      = batch_size,
                    validation_data = (x_test, y_test),
-                    verbose         = 1,
+                    verbose         = fit_verbosity,
                    callbacks       = [savemodel_callback])
 ```

 %% Cell type:markdown id: tags:

-## Step 6 - Evaluate
-### 6.1 - Training history
+### 6.4 - Training history

 %% Cell type:code id: tags:

 ``` python
-ooo.plot_history(history)
+fidle.scrawler.history(history, save_as='02-history')
 ```

 %% Cell type:markdown id: tags:

-### 6.2 - Reload and evaluate best model
+## Step 7 - Evaluation
+Reload and evaluate best model

 %% Cell type:code id: tags:

 ``` python
-model = keras.models.load_model('./run/models/best_model.h5')
+model = keras.models.load_model(f'{run_dir}/models/best_model.keras')

 # ---- Evaluate
-reload(ooo)
 score  = model.evaluate(x_test, y_test, verbose=0)

 print('x_test / loss      : {:5.4f}'.format(score[0]))
 print('x_test / accuracy  : {:5.4f}'.format(score[1]))

 values=[score[1], 1-score[1]]
-ooo.plot_donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :")
+fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')

 # ---- Confusion matrix

-y_pred   = model.predict_classes(x_test)
+y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
+
+y_pred = y_sigmoid.copy()
+y_pred[ y_sigmoid< 0.5 ] = 0
+y_pred[ y_sigmoid>=0.5 ] = 1

-ooo.display_confusion_matrix(y_test,y_pred,labels=range(2),color='orange',font_size='20pt')
+fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
+fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
 ```

 %% Cell type:code id: tags:

 ``` python
+fidle.end()
 ```
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

 %% Cell type:markdown id: tags:

-Text Embedding - IMDB dataset
-=============================
---
-Formation Introduction au Deep Learning  (FIDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
-
-## Text classification using **Text embedding** :
+<img width="800px" src="../fidle/img/header.svg"></img>

-The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+# <!-- TITLE --> [K3IMDB5] - Sentiment analysis with a RNN network
+<!-- DESC --> Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
+ - Use of a model combining embedding and LSTM

 Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
 Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
-For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
+For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://keras.io/datasets)

-What we're going to do:
+## What we're going to do :

 - Retrieve data
 - Preparing the data
- - Build a model
+ - Build a Embedding/LSTM model
 - Train the model
 - Evaluate the result

 %% Cell type:markdown id: tags:

 ## Step 1 - Init python stuff

 %% Cell type:code id: tags:

 ``` python
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras
+import keras.datasets.imdb as imdb
+
+import json,re
 import numpy as np

-import tensorflow as tf
-import tensorflow.keras as keras
-import tensorflow.keras.datasets.imdb as imdb
+import fidle
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3IMDB5')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Parameters
+The words in the vocabulary are classified from the most frequent to the rarest.
+`vocab_size` is the number of words we will remember in our vocabulary (the other words will be considered as unknown).
+`hide_most_frequently` is the number of ignored words, among the most common ones
+`review_len` is the review length
+`dense_vector_size` is the size of the generated dense vectors
+`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch\
+`scale` is a dataset scale factor - note a scale=1 need a training time > 10'

-import matplotlib.pyplot as plt
-import matplotlib
-import seaborn as sns
+%% Cell type:code id: tags:

-import os,sys,h5py,json
+``` python
+vocab_size           = 10000
+hide_most_frequently = 0

-from importlib import reload
+review_len           = 256
+dense_vector_size    = 32

-sys.path.append('..')
-import fidle.pwk as ooo
+epochs               = 10
+batch_size           = 128

-ooo.init()
+fit_verbosity        = 1
+scale                = 0.2
 ```

 %% Cell type:markdown id: tags:

-## Step 2 - Retrieve data
+Override parameters (batch mode) - Just forget this cell

-**From Keras :**
-This IMDb dataset can bet get directly from [Keras datasets](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
+%% Cell type:code id: tags:

-Due to their nature, textual data can be somewhat complex.
+``` python
+fidle.override('vocab_size', 'hide_most_frequently', 'review_len', 'dense_vector_size')
+fidle.override('batch_size', 'epochs', 'fit_verbosity', 'scale')
+```

-### 2.1 - Data structure :
-The dataset is composed of 2 parts: **reviews** and **opinions** (positive/negative),  with a **dictionary**
+%% Cell type:markdown id: tags:
+
+## Step 3 - Retrieve data

-  - dataset = (reviews, opinions)
-    - reviews = \[ review_0, review_1, ...\]
-      - review_i = [ int1, int2, ...] where int_i is the index of the word in the dictionary.
-    - opinions = \[ int0, int1, ...\] where int_j == 0 if opinion is negative or 1 if opinion is positive.
-  - dictionary = \[ mot1:int1, mot2:int2, ... ]
+IMDb dataset can bet get directly from Keras - see [documentation](https://keras.io/api/datasets)
+Note : Due to their nature, textual data can be somewhat complex.

 %% Cell type:markdown id: tags:

-### 2.2 - Get dataset
-For simplicity, we will use a pre-formatted dataset.
-See : https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data
+### 3.1 - Get dataset
+For simplicity, we will use a pre-formatted dataset - See [documentation](https://keras.io/api/datasets/imdb/)
+However, Keras offers some usefull tools for formatting textual data - See [documentation](https://keras.io/api/layers/preprocessing_layers/text/text_vectorization/)

-However, Keras offers some usefull tools for formatting textual data.
-See : https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text
+**Load dataset :**

 %% Cell type:code id: tags:

 ``` python
-vocab_size = 10000
-
 # ----- Retrieve x,y
 #
-(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
-                                                       skip_top   = 0,
-                                                       maxlen     = None,
-                                                       seed       = 42,
-                                                       start_char = 1,
-                                                       oov_char   = 2,
-                                                       index_from = 3, )
-```
+start_char = 1      # Start of a sequence (padding is 0)
+oov_char   = 2      # Out-of-vocabulary
+index_from = 3      # First word id

-%% Cell type:code id: tags:
+(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,
+                                                       skip_top   = hide_most_frequently,
+                                                       start_char = start_char,
+                                                       oov_char   = oov_char,
+                                                       index_from = index_from)

-``` python
-print("  Max(x_train,x_test)  : ", ooo.rmax([x_train,x_test]) )
-print("  x_train : {}  y_train : {}".format(x_train.shape, y_train.shape))
-print("  x_test  : {}  y_test  : {}".format(x_test.shape,  y_test.shape))
+# ---- Rescale
+#
+n1 = int(scale * len(x_train))
+n2 = int(scale * len(x_test))
+x_train, y_train = x_train[:n1], y_train[:n1]
+x_test,  y_test  = x_test[:n2],  y_test[:n2]

-print('\nReview example (x_train[12]) :\n\n',x_train[12])
+# ---- About
+#
+print("Max(x_train,x_test)  : ", fidle.utils.rmax([x_train,x_test]) )
+print("Min(x_train,x_test)  : ", fidle.utils.rmin([x_train,x_test]) )
+print("Len(x_train)         : ", len(x_train))
+print("Len(x_test)          : ", len(x_test))
 ```

 %% Cell type:markdown id: tags:

-### 2.3 - Have a look for humans (optional)
+### 3.2 - Have a look for humans (optional)
 When we loaded the dataset, we asked for using \<start\> as 1, \<unknown word\> as 2
 So, we shifted the dataset by 3 with the parameter index_from=3

+**Load dictionary :**
+
 %% Cell type:code id: tags:

 ``` python
 # ---- Retrieve dictionary {word:index}, and encode it in ascii
-
+#      Shift the dictionary from +3
+#      Add <pad>, <start> and <unknown> tags
+#      Create a reverse dictionary : {index:word}
+#
 word_index = imdb.get_word_index()
-
-# ---- Shift the dictionary from +3
-
-word_index = {w:(i+3) for w,i in word_index.items()}
-
-# ---- Add <pad>, <start> and unknown tags
-
-word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )
-
-# ---- Create a reverse dictionary : {index:word}
-
+word_index = {w:(i+index_from) for w,i in word_index.items()}
+word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
 index_word = {index:word for word,index in word_index.items()}

-# ---- Add a nice function to transpose :
+# ---- A nice function to transpose :
 #
 def dataset2text(review):
    return ' '.join([index_word.get(i, '?') for i in review])
 ```

-%% Cell type:code id: tags:
-
-``` python
-print('\nDictionary size     : ', len(word_index))
-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-print('\nIn real words :\n\n', dataset2text(x_train[12]))
-```
-
 %% Cell type:markdown id: tags:

-### 2.4 - Have a look for neurons
+**Have a look :**

 %% Cell type:code id: tags:

 ``` python
-plt.figure(figsize=(12, 6))
-ax=sns.distplot([len(i) for i in x_train],bins=60)
-ax.set_title('Distribution of reviews by size')
-plt.xlabel("Review's sizes")
-plt.ylabel('Density')
-ax.set_xlim(0, 1500)
-plt.show()
+print('\nDictionary size     : ', len(word_index))
+for k in range(440,455):print(f'{k:2d} : {index_word[k]}' )
+fidle.utils.subtitle('Review example :')
+print(x_train[12])
+fidle.utils.subtitle('After translation :')
+print(dataset2text(x_train[12]))
 ```

 %% Cell type:markdown id: tags:

-## Step 3 - Preprocess the data
-In order to be processed by an NN, all entries must have the same length.
+## Step 4 - Preprocess the data (padding)
+In order to be processed by an NN, all entries must have the **same length.**
 We chose a review length of **review_len**
 We will therefore complete them with a padding (of \<pad\>\)

 %% Cell type:code id: tags:

 ``` python
-review_len = 256
-
 x_train = keras.preprocessing.sequence.pad_sequences(x_train,
                                                     value   = 0,
                                                     padding = 'post',
                                                     maxlen  = review_len)

 x_test  = keras.preprocessing.sequence.pad_sequences(x_test,
                                                     value   = 0 ,
                                                     padding = 'post',
                                                     maxlen  = review_len)

-print('\nReview example (x_train[12]) :\n\n',x_train[12])
-print('\nIn real words :\n\n', dataset2text(x_train[12]))
-```
-
-%% Cell type:markdown id: tags:
-
-### Save dataset and dictionary (can be usefull)
-
-%% Cell type:code id: tags:
-
-``` python
-os.makedirs('./data',   mode=0o750, exist_ok=True)
-
-with h5py.File('./data/dataset_imdb.h5', 'w') as f:
-    f.create_dataset("x_train",    data=x_train)
-    f.create_dataset("y_train",    data=y_train)
-    f.create_dataset("x_test",     data=x_test)
-    f.create_dataset("y_test",     data=y_test)
-
-with open('./data/word_index.json', 'w') as fp:
-    json.dump(word_index, fp)
-
-with open('./data/index_word.json', 'w') as fp:
-    json.dump(index_word, fp)
-
-print('Saved.')
+fidle.utils.subtitle('After padding :')
+print(x_train[12])
+fidle.utils.subtitle('In real words :')
+print(dataset2text(x_train[12]))
 ```

 %% Cell type:markdown id: tags:

-## Step 4 - Build the model
-Few remarks :
-1. We'll choose a dense vector size for the embedding output with **dense_vector_size**
-2. **GlobalAveragePooling1D** do a pooling on the last dimension : (None, lx, ly) -> (None, ly)
-In other words: we average the set of vectors/words of a sentence
-3. L'embedding de Keras fonctionne de manière supervisée. Il s'agit d'une couche de *vocab_size* neurones vers *n_neurons* permettant de maintenir une table de vecteurs (les poids constituent les vecteurs). Cette couche ne calcule pas de sortie a la façon des couches normales, mais renvois la valeur des vecteurs. n mots => n vecteurs (ensuite empilés par le pooling)
-Voir : https://stats.stackexchange.com/questions/324992/how-the-embedding-layer-is-trained-in-keras-embedding-layer
+## Step 5 - Build the model

-A SUIVRE : https://www.liip.ch/en/blog/sentiment-detection-with-keras-word-embeddings-and-lstm-deep-learning-networks
-### 4.1 - Build
-More documentation about :
- - [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding)
- - [GlobalAveragePooling1D](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D)
+More documentation about this model functions :
+ - [Embedding](https://keras.io/api/layers/core_layers/embedding/)
+ - [GlobalAveragePooling1D](https://keras.io/api/layers/pooling_layers/global_average_pooling1d)

 %% Cell type:code id: tags:

 ``` python
-def get_model(dense_vector_size=32):
+model = keras.Sequential()
+model.add(keras.layers.Embedding(input_dim = vocab_size, output_dim = dense_vector_size))
+model.add(keras.layers.GRU(50))
+model.add(keras.layers.Dense(1, activation='sigmoid'))

-    model = keras.Sequential()
-    model.add(keras.layers.Embedding(input_dim    = vocab_size,
-                                     output_dim   = dense_vector_size,
-                                     input_length = review_len))
-    model.add(keras.layers.GlobalAveragePooling1D())
-    model.add(keras.layers.Dense(dense_vector_size, activation='relu'))
-    model.add(keras.layers.Dense(1,                 activation='sigmoid'))
-
-    model.compile(optimizer = 'adam',
-                  loss      = 'binary_crossentropy',
-                  metrics   = ['accuracy'])
-    return model
-```
-
-%% Cell type:markdown id: tags:
-
-## Step 5 - Train the model
-### 5.1 - Get it
-
-%% Cell type:code id: tags:
-
-``` python
-model = get_model(32)
+model.compile(optimizer = 'rmsprop',
+              loss      = 'binary_crossentropy',
+              metrics   = ['accuracy'])

 model.summary()
 ```

 %% Cell type:markdown id: tags:

-### 5.2 - Add callback
+## Step 6 - Train the model
+### 6.1 - Add Callbacks

 %% Cell type:code id: tags:

 ``` python
-os.makedirs('./run/models',   mode=0o750, exist_ok=True)
-save_dir = "./run/models/best_model.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)
+os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)
+save_dir = f'{run_dir}/models/best_model.keras'
+
+savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)
 ```

 %% Cell type:markdown id: tags:

-### 5.1 - Train it
+### 6.2 - Train it
+Note : With a scale=0.2, batch_size=128, epochs=10 => Need 4' on a cpu laptop

 %% Cell type:code id: tags:

 ``` python
-%%time
-
-n_epochs   = 30
-batch_size = 512
-
 history = model.fit(x_train,
                    y_train,
-                    epochs          = n_epochs,
+                    epochs          = epochs,
                    batch_size      = batch_size,
                    validation_data = (x_test, y_test),
-                    verbose         = 1,
+                    verbose         = fit_verbosity,
                    callbacks       = [savemodel_callback])
 ```

 %% Cell type:markdown id: tags:

-## Step 6 - Evaluate
-### 6.1 - Training history
+### 6.4 - Training history

 %% Cell type:code id: tags:

 ``` python
-ooo.plot_history(history)
+fidle.scrawler.history(history, save_as='02-history')
 ```

 %% Cell type:markdown id: tags:

-### 6.2 - Reload and evaluate best model
+## Step 7 - Evaluation
+Reload and evaluate best model

 %% Cell type:code id: tags:

 ``` python
-model = keras.models.load_model('./run/models/best_model.h5')
+model = keras.models.load_model(f'{run_dir}/models/best_model.keras')

 # ---- Evaluate
-reload(ooo)
 score  = model.evaluate(x_test, y_test, verbose=0)

 print('x_test / loss      : {:5.4f}'.format(score[0]))
 print('x_test / accuracy  : {:5.4f}'.format(score[1]))

 values=[score[1], 1-score[1]]
-ooo.plot_donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :")
+fidle.scrawler.donut(values,["Accuracy","Errors"], title="#### Accuracy donut is :", save_as='03-donut')

 # ---- Confusion matrix

-y_pred   = model.predict_classes(x_test)
+y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
+
+y_pred = y_sigmoid.copy()
+y_pred[ y_sigmoid< 0.5 ] = 0
+y_pred[ y_sigmoid>=0.5 ] = 1

-ooo.display_confusion_matrix(y_test,y_pred,labels=range(2),color='orange',font_size='20pt')
+fidle.scrawler.confusion_matrix_txt(y_test,y_pred,labels=range(2))
+fidle.scrawler.confusion_matrix(y_test,y_pred,range(2), figsize=(8, 8),normalize=False, save_as='04-confusion-matrix')
 ```

 %% Cell type:code id: tags:

 ``` python
+fidle.end()
 ```
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

--- a/GTSRB/01-Preparation-of-data.ipynb
+++ b/GTSRB/01-Preparation-of-data.ipynb
 %% Cell type:markdown id: tags:

-German Traffic Sign Recognition Benchmark (GTSRB)
-=================================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3GTSRB1] - Dataset analysis and preparation
+<!-- DESC --> Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - Understand the **complexity associated with data**, even when it is only images
+ - Learn how to build up a simple and **usable image dataset**
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+

-## Episode 1 : Preparation of data
+## What we're going to do :

 - Understanding the dataset
 - Preparing and formatting enhanced data
 - Save enhanced datasets in h5 file format

 %% Cell type:markdown id: tags:

-## 1/ Import and init
+## Step 1 -  Import and init

 %% Cell type:code id: tags:

 ``` python
 import os, time, sys
 import csv
 import math, random

 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
 import h5py

 from skimage.morphology import disk
+from skimage.util import img_as_ubyte
 from skimage.filters import rank
 from skimage import io, color, exposure, transform

 from importlib import reload

-sys.path.append('..')
-import fidle.pwk as ooo
+import fidle
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3GTSRB1')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Parameters
+The generation of datasets may require some time and space : **10' and 10 GB**.
+
+You can choose to perform tests or generate the whole enhanced dataset by setting the following parameters:
+`scale` : 1 mean 100% of the dataset - set 0.2 for tests (need 2 minutes with scale = 0.2)
+`progress_verbosity`: Verbosity of progress bar: 0=silent, 1=progress bar, 2=One line
+`output_dir` : where to write enhanced dataset, could be :
+ - `./data`, for tests purpose
+ - `<datasets_dir>/GTSRB/enhanced` to add clusters in your datasets dir.
+
+Uncomment the right lines according to what you want :
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- For smart tests :
+#
+scale      = 0.2
+output_dir = './data'
+
+# ---- For a Full dataset generation :
+#
+# scale      = 1
+# output_dir = f'{datasets_dir}/GTSRB/enhanced'
+
+# ---- Verbosity
+#
+progress_verbosity = 2
+```
+
+%% Cell type:markdown id: tags:
+
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:

-ooo.init()
+``` python
+fidle.override('scale', 'output_dir', 'progress_verbosity')
 ```

 %% Cell type:markdown id: tags:

-## 2/ Read the dataset
+## Step 3 - Read the dataset
 Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
- - Each directory contains one CSV file with annotations ("GT-<ClassID>.csv") and the training images
- - First line is fieldnames: Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId
+ - Each directory contains one CSV file with annotations : `GT-<ClassID>.csv` and the training images
+ - First line is fieldnames: `Filename ; Width ; Height ; Roi.X1 ; Roi.Y1 ; Roi.X2 ; Roi.Y2 ; ClassId`
+
+### 3.1 - Understanding the dataset
+The original dataset is in : **\<dataset_dir\>/GTSRB/origine.**
+There is 3 subsets : **Train**, **Test** and **Meta.**
+Each subset have an **csv file** and a **subdir** with **images**.

-### 2.1/ Usefull functions

 %% Cell type:code id: tags:

 ``` python
-def read_dataset_dir(csv_filename):
-    '''Reads traffic sign data from German Traffic Sign Recognition Benchmark dataset.
+df = pd.read_csv(f'{datasets_dir}/GTSRB/origine/Test.csv', header=0)
+display(df.head(10))
+```

-    Arguments:  csv filename
-                Example /data/GTSRB/Train.csv
-    Returns:   np array of images, np array of corresponding labels'''
+%% Cell type:markdown id: tags:

-    # ---- csv filename and path
-    #
-    name=os.path.basename(csv_filename)
-    path=os.path.dirname(csv_filename)
+### 3.2 - Usefull functions
+A nice function for reading a dataset from an index.csv file.\
+Input: an intex.csv file\
+Output: an array of images ans an array of corresponding labels
+
+%% Cell type:code id: tags:
+
+``` python
+def  read_csv_dataset(csv_file):
+    '''
+    Reads traffic sign data from German Traffic Sign Recognition Benchmark dataset.
+    Arguments:
+        csv filename :  Description file, Example /data/GTSRB/Train.csv
+    Returns:
+        x,y          :  np array of images, np array of corresponding labels
+    '''
+
+    path = os.path.dirname(csv_file)
+    name = os.path.basename(csv_file)

    # ---- Read csv file
    #
-    f,x,y = [],[],[]
-    with open(csv_filename) as csv_file:
-        reader = csv.DictReader(csv_file, delimiter=',')
-        for row in reader:
-            f.append( path+'/'+row['Path'] )
-            y.append( int(row['ClassId'])  )
-        csv_file.close()
-    nb_images = len(f)
+    df = pd.read_csv(csv_file, header=0)
+
+    # ---- Get filenames and ClassIds
+    #
+    filenames = df['Path'].to_list()
+    y         = df['ClassId'].to_list()
+    x         = []

    # ---- Read images
    #
-    for filename in f:
-        image=io.imread(filename)
+    for filename in filenames:
+        image=io.imread(f'{path}/{filename}')
        x.append(image)
-        ooo.update_progress(name,len(x),nb_images)
+        fidle.utils.update_progress(name,len(x),len(filenames), verbosity=progress_verbosity)
+
    # ---- Return
    #
-    return np.array(x),np.array(y)
+    return np.array(x,dtype=object),np.array(y)
 ```

 %% Cell type:markdown id: tags:

-### 2.2/ Read the data
+### 3.2 - Read the data
 We will read the following datasets:
- - **x_train, y_train** : Learning data
- - **x_test, y_test** : Validation or test data
- - x_meta, y_meta : Illustration data
+ - **Train** subset, for learning data as :  `x_train, y_train`
+ - **Test** subset, for validation data as :  `x_test, y_test`
+ - **Meta** subset, for visualisation as : `x_meta, y_meta`

-The learning data will be randomly mixted and the illustration data sorted.
-Will take about 2-3'
+The learning data will be randomly mixted and the illustration data (Meta) sorted.
+Will take about 1'30s on HPC or 45s on my labtop.

 %% Cell type:code id: tags:

 ``` python
-%%time
+chrono=fidle.Chrono()
+
+chrono.start()

 # ---- Read datasets
-(x_train,y_train) = read_dataset_dir('./data/origine/Train.csv')
-(x_test ,y_test)  = read_dataset_dir('./data/origine/Test.csv')
-(x_meta ,y_meta)  = read_dataset_dir('./data/origine/Meta.csv')
+
+(x_train,y_train) = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Train.csv')
+(x_test ,y_test)  = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Test.csv')
+(x_meta ,y_meta)  = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Meta.csv')

 # ---- Shuffle train set
-combined = list(zip(x_train,y_train))
-random.shuffle(combined)
-x_train,y_train = zip(*combined)
+
+x_train, y_train = fidle.utils.shuffle_np_dataset(x_train, y_train)

 # ---- Sort Meta
+
 combined = list(zip(x_meta,y_meta))
 combined.sort(key=lambda x: x[1])
 x_meta,y_meta = zip(*combined)
+
+chrono.show()
 ```

 %% Cell type:markdown id: tags:

-## 3/ Few statistics about train dataset
+## Step 4 - Few statistics about train dataset
 We want to know if our images are homogeneous in terms of size, ratio, width or height.

-### 3.1/ Do statistics
+### 4.1 - Do statistics

 %% Cell type:code id: tags:

 ``` python
 train_size  = []
 train_ratio = []
 train_lx    = []
 train_ly    = []

 test_size   = []
 test_ratio  = []
 test_lx     = []
 test_ly     = []

 for image in x_train:
    (lx,ly,lz) = image.shape
    train_size.append(lx*ly/1024)
    train_ratio.append(lx/ly)
    train_lx.append(lx)
    train_ly.append(ly)

 for image in x_test:
    (lx,ly,lz) = image.shape
    test_size.append(lx*ly/1024)
    test_ratio.append(lx/ly)
    test_lx.append(lx)
    test_ly.append(ly)
 ```

 %% Cell type:markdown id: tags:

-### 3.2/ Show statistics
+### 4.2 - Show statistics

 %% Cell type:code id: tags:

 ``` python
+figsize=(10,4)
 # ------ Global stuff
-print("x_train size : ",len(x_train))
-print("y_train size : ",len(y_train))
-print("x_test size  : ",len(x_test))
-print("y_test size  : ",len(y_test))
+print("x_train shape : ",x_train.shape)
+print("y_train shape : ",y_train.shape)
+print("x_test  shape : ",x_test.shape)
+print("y_test  shape : ",y_test.shape)

 # ------ Statistics / sizes
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([train_size,test_size], bins=100)
 plt.gca().set(title='Sizes in Kpixels - Train=[{:5.2f}, {:5.2f}]'.format(min(train_size),max(train_size)),
-              ylabel='Population',
-              xlim=[0,30])
+              ylabel='Population', xlim=[0,30])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('01-stats-sizes')
 plt.show()

 # ------ Statistics / ratio lx/ly
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([train_ratio,test_ratio], bins=100)
 plt.gca().set(title='Ratio lx/ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ratio),max(train_ratio)),
-              ylabel='Population',
-              xlim=[0.8,1.2])
+              ylabel='Population', xlim=[0.8,1.2])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('02-stats-ratios')
 plt.show()

 # ------ Statistics / lx
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([train_lx,test_lx], bins=100)
 plt.gca().set(title='Images lx - Train=[{:5.2f}, {:5.2f}]'.format(min(train_lx),max(train_lx)),
-              ylabel='Population',
-              xlim=[20,150])
+              ylabel='Population', xlim=[20,150])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('03-stats-lx')
 plt.show()

 # ------ Statistics / ly
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([train_ly,test_ly], bins=100)
 plt.gca().set(title='Images ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ly),max(train_ly)),
-              ylabel='Population',
-              xlim=[20,150])
+              ylabel='Population', xlim=[20,150])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('04-stats-ly')
 plt.show()

 # ------ Statistics / classId
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([y_train,y_test], bins=43)
-plt.gca().set(title='ClassesId',
-              ylabel='Population',
-              xlim=[0,43])
+plt.gca().set(title='ClassesId', ylabel='Population', xlim=[0,43])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('05-stats-classes')
 plt.show()
 ```

 %% Cell type:markdown id: tags:

-## 4/ List of classes
+## Step 5 - List of classes
 What are the 43 classes of our images...

 %% Cell type:code id: tags:

 ``` python
-ooo.plot_images(x_meta,y_meta, range(43), columns=8, x_size=2, y_size=2,
-                                colorbar=False, y_pred=None, cm='binary')
+fidle.scrawler.images( x_meta,y_meta, range(43), columns=8, x_size=1.4, y_size=1.4,
+                       colorbar=False, y_pred=None, cm='binary', save_as='06-meta-signs')
 ```

 %% Cell type:markdown id: tags:

-## 5/ What does it really look like
+## Step 6 - What does it really look like

 %% Cell type:code id: tags:

 ``` python
 # ---- Get and show few images

 samples = [ random.randint(0,len(x_train)-1) for i in range(32)]
-ooo.plot_images(x_train,y_train, samples, columns=8, x_size=2, y_size=2, colorbar=False, y_pred=None, cm='binary')
+fidle.scrawler.images( x_train,y_train, samples, columns=8, x_size=1.5, y_size=1.5,
+                       colorbar=False, y_pred=None, cm='binary', save_as='07-real-signs')
 ```

 %% Cell type:markdown id: tags:

-## 6/ dataset cooking...
+## Step 7 - dataset cooking...
+
+Images **must** :
+ - have the **same size** to match the size of the network,
+ - be **normalized**.

-Images must have the **same size** to match the size of the network.
-It is possible to work on **rgb** or **monochrome** images and **equalize** the histograms.
-The data must be **normalized**.
+It is possible to work on **rgb** or **monochrome** images and to **equalize** the histograms.

 See : [Exposure with scikit-image](https://scikit-image.org/docs/dev/api/skimage.exposure.html)
 See : [Local histogram equalization](https://scikit-image.org/docs/dev/api/skimage.filters.rank.html#skimage.filters.rank.equalize)
 See : [Histogram equalization](https://scikit-image.org/docs/dev/api/skimage.exposure.html#skimage.exposure.equalize_hist)

-### 6.1/ Enhancement cook
+### 7.1 - Enhancement cooking
+A nice function for preparing our data.
+Input: a set of images (numpy array)
+Output: a enhanced images, resized and reprocessed (numpy array)

 %% Cell type:code id: tags:

 ``` python
-def images_enhancement(images, width=25, height=25, mode='RGB'):
+def images_enhancement(images, width=25, height=25, proc='RGB'):
    '''
    Resize and convert images - doesn't change originals.
    input images must be RGBA or RGB.
+    Note : all outputs are fixed size numpy array of float32
    args:
        images :         images list
        width,height :   new images size (25,25)
        mode :           RGB | RGB-HE | L | L-HE | L-LHE | L-CLAHE
    return:
        numpy array of enhanced images
    '''
-    modes = { 'RGB':3, 'RGB-HE':3, 'L':1, 'L-HE':1, 'L-LHE':1, 'L-CLAHE':1}
-    lz=modes[mode]
+    lz={ 'RGB':3, 'RGB-HE':3, 'L':1, 'L-HE':1, 'L-LHE':1, 'L-CLAHE':1}[proc]

    out=[]
    for img in images:

        # ---- if RGBA, convert to RGB
        if img.shape[2]==4:
            img=color.rgba2rgb(img)

        # ---- Resize
        img = transform.resize(img, (width,height))

        # ---- RGB / Histogram Equalization
-        if mode=='RGB-HE':
+        if proc=='RGB-HE':
            hsv = color.rgb2hsv(img.reshape(width,height,3))
            hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2])
            img = color.hsv2rgb(hsv)

        # ---- Grayscale
-        if mode=='L':
+        if proc=='L':
            img=color.rgb2gray(img)

        # ---- Grayscale / Histogram Equalization
-        if mode=='L-HE':
+        if proc=='L-HE':
            img=color.rgb2gray(img)
            img=exposure.equalize_hist(img)

        # ---- Grayscale / Local Histogram Equalization
-        if mode=='L-LHE':
+        if proc=='L-LHE':
            img=color.rgb2gray(img)
+            img = img_as_ubyte(img)
            img=rank.equalize(img, disk(10))/255.

        # ---- Grayscale / Contrast Limited Adaptive Histogram Equalization (CLAHE)
-        if mode=='L-CLAHE':
+        if proc=='L-CLAHE':
            img=color.rgb2gray(img)
            img=exposure.equalize_adapthist(img)

        # ---- Add image in list of list
        out.append(img)
-        ooo.update_progress('Enhancement: ',len(out),len(images))
+        fidle.utils.update_progress('Enhancement: ',len(out),len(images))

    # ---- Reshape images
    #     (-1, width,height,1) for L
    #     (-1, width,height,3) for RGB
    #
-    out = np.array(out,dtype='float64')
+    out = np.array(out,dtype='float32')
    out = out.reshape(-1,width,height,lz)
    return out
 ```

 %% Cell type:markdown id: tags:

-### 6.2/ To get an idea of the different recipes
+### 7.2 - To get an idea of the different recipes

 %% Cell type:code id: tags:

 ``` python
 i=random.randint(0,len(x_train)-16)
 x_samples = x_train[i:i+16]
 y_samples = y_train[i:i+16]

 datasets  = {}

-datasets['RGB']      = images_enhancement( x_samples, width=25, height=25, mode='RGB'  )
-datasets['RGB-HE']   = images_enhancement( x_samples, width=25, height=25, mode='RGB-HE'  )
-datasets['L']        = images_enhancement( x_samples, width=25, height=25, mode='L'  )
-datasets['L-HE']     = images_enhancement( x_samples, width=25, height=25, mode='L-HE'  )
-datasets['L-LHE']    = images_enhancement( x_samples, width=25, height=25, mode='L-LHE'  )
-datasets['L-CLAHE']  = images_enhancement( x_samples, width=25, height=25, mode='L-CLAHE'  )
+datasets['RGB']      = images_enhancement( x_samples, width=25, height=25, proc='RGB'  )
+datasets['RGB-HE']   = images_enhancement( x_samples, width=25, height=25, proc='RGB-HE'  )
+datasets['L']        = images_enhancement( x_samples, width=25, height=25, proc='L'  )
+datasets['L-HE']     = images_enhancement( x_samples, width=25, height=25, proc='L-HE'  )
+datasets['L-LHE']    = images_enhancement( x_samples, width=25, height=25, proc='L-LHE'  )
+datasets['L-CLAHE']  = images_enhancement( x_samples, width=25, height=25, proc='L-CLAHE'  )

-print('\nEXPECTED (Meta) :\n')
+fidle.utils.subtitle('EXPECTED')
 x_expected=[ x_meta[i] for i in y_samples]
-ooo.plot_images(x_expected, y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+fidle.scrawler.images(x_expected, y_samples, range(12), columns=12, x_size=1, y_size=1,
+                colorbar=False, y_pred=None, cm='binary', save_as='08-expected')

-print('\nORIGINAL IMAGES :\n')
-ooo.plot_images(x_samples,  y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+fidle.utils.subtitle('ORIGINAL')
+fidle.scrawler.images(x_samples,  y_samples, range(12), columns=12, x_size=1, y_size=1,
+                colorbar=False, y_pred=None, cm='binary', save_as='09-original')

-print('\nENHANCED :\n')
+fidle.utils.subtitle('ENHANCED')
+n=10
 for k,d in datasets.items():
    print("dataset : {}  min,max=[{:.3f},{:.3f}]  shape={}".format(k,d.min(),d.max(), d.shape))
-    ooo.plot_images(d, y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+    fidle.scrawler.images(d, y_samples, range(12), columns=12, x_size=1, y_size=1,
+                    colorbar=False, y_pred=None, cm='binary', save_as=f'{n}-enhanced-{k}')
+    n+=1
 ```

 %% Cell type:markdown id: tags:

-### 6.3/ Cook and save
-A function to save a dataset
+### 7.3 - Cook and save
+
+A function to save a dataset (h5 file)

 %% Cell type:code id: tags:

 ``` python
-def save_h5_dataset(x_train, y_train, x_test, y_test, x_meta,y_meta, h5name):
-
-    # ---- Filename
-    filename='./data/'+h5name
+def save_h5_dataset(x_train, y_train, x_test, y_test, x_meta,y_meta, filename):

    # ---- Create h5 file
    with h5py.File(filename, "w") as f:
        f.create_dataset("x_train", data=x_train)
        f.create_dataset("y_train", data=y_train)
        f.create_dataset("x_test",  data=x_test)
        f.create_dataset("y_test",  data=y_test)
        f.create_dataset("x_meta",  data=x_meta)
        f.create_dataset("y_meta",  data=y_meta)

    # ---- done
    size=os.path.getsize(filename)/(1024*1024)
-    print('Dataset : {:24s}  shape : {:22s} size : {:6.1f} Mo   (saved)\n'.format(filename, str(x_train.shape),size))
+    print('Dataset : {:24s}  shape : {:22s} size : {:6.1f} Mo   (saved)'.format(filename, str(x_train.shape),size))
 ```

 %% Cell type:markdown id: tags:

-Create enhanced datasets, and save them...
-Will take about 7-8'
+Generate enhanced datasets :

 %% Cell type:code id: tags:

 ``` python
-%%time
+# ---- Size and processings
+#
+all_size= [24, 48]
+all_proc=['RGB', 'RGB-HE', 'L', 'L-LHE']

-for s in [24, 48]:
-    for m in ['RGB', 'RGB-HE', 'L', 'L-LHE']:
+# ---- Do it
+#
+chrono.start()
+
+n_train = int( len(x_train)*scale )
+n_test  = int( len(x_test)*scale )
+
+fidle.utils.subtitle('Parameters :')
+print(f'Scale is : {scale}')
+print(f'x_train length is : {n_train}')
+print(f'x_test  length is : {n_test}')
+print(f'output dir is     : {output_dir}\n')
+
+fidle.utils.subtitle('Running...')
+
+fidle.utils.mkdir(output_dir)
+
+for s in all_size:
+    for m in all_proc:
        # ---- A nice dataset name
-        name='set-{}x{}-{}.h5'.format(s,s,m)
-        print("\nDataset : ",name)
+        filename = f'{output_dir}/set-{s}x{s}-{m}.h5'
+        fidle.utils.subtitle(f'Dataset : {filename}')
+
        # ---- Enhancement
-        x_train_new = images_enhancement( x_train, width=s, height=s, mode=m )
-        x_test_new  = images_enhancement( x_test,  width=s, height=s, mode=m )
-        x_meta_new  = images_enhancement( x_meta,  width=s, height=s, mode='RGB' )
+        #      Note : x_train is a numpy array of python objects (images with <> sizes)
+        #             but images_enhancement() return a real array of float64 numpy (images with same size)
+        #             so, we can save it in nice h5 files
+        #
+        x_train_new = images_enhancement( x_train[:n_train], width=s, height=s, proc=m )
+        x_test_new  = images_enhancement( x_test[:n_test],   width=s, height=s, proc=m )
+        x_meta_new  = images_enhancement( x_meta,            width=s, height=s, proc='RGB' )
+
        # ---- Save
-        save_h5_dataset( x_train_new, y_train, x_test_new, y_test, x_meta_new,y_meta, name)
+        save_h5_dataset( x_train_new, y_train[:n_train], x_test_new, y_test[:n_test], x_meta_new,y_meta, filename)

 x_train_new,x_test_new=0,0
+
+print('\nDone.')
+chrono.show()
 ```

 %% Cell type:markdown id: tags:

-## 7/ Reload data to be sure ;-)
+## Step 8 - Reload data to be sure ;-)

 %% Cell type:code id: tags:

 ``` python
-%%time
+chrono.start()

-dataset='set-48x48-L'
+dataset='set-24x24-L'
 samples=range(24)

-with  h5py.File('./data/'+dataset+'.h5') as f:
+with  h5py.File(f'{output_dir}/{dataset}.h5','r') as f:
    x_tmp = f['x_train'][:]
    y_tmp = f['y_train'][:]
    print("dataset loaded from h5 file.")

-ooo.plot_images(x_tmp,y_tmp, samples, columns=8, x_size=2, y_size=2, colorbar=False, y_pred=None, cm='binary')
+fidle.scrawler.images(x_tmp,y_tmp, samples, columns=8, x_size=1.5, y_size=1.5,
+                colorbar=False, y_pred=None, cm='binary', save_as='16-enhanced_images')
 x_tmp,y_tmp=0,0
+
+chrono.show()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.end()
 ```

 %% Cell type:markdown id: tags:

----
-That's all folks !
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

 %% Cell type:markdown id: tags:

-German Traffic Sign Recognition Benchmark (GTSRB)
-=================================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3GTSRB1] - Dataset analysis and preparation
+<!-- DESC --> Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+ - Understand the **complexity associated with data**, even when it is only images
+ - Learn how to build up a simple and **usable image dataset**
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+

-## Episode 1 : Preparation of data
+## What we're going to do :

 - Understanding the dataset
 - Preparing and formatting enhanced data
 - Save enhanced datasets in h5 file format

 %% Cell type:markdown id: tags:

-## 1/ Import and init
+## Step 1 -  Import and init

 %% Cell type:code id: tags:

 ``` python
 import os, time, sys
 import csv
 import math, random

 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
 import h5py

 from skimage.morphology import disk
+from skimage.util import img_as_ubyte
 from skimage.filters import rank
 from skimage import io, color, exposure, transform

 from importlib import reload

-sys.path.append('..')
-import fidle.pwk as ooo
+import fidle
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3GTSRB1')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Parameters
+The generation of datasets may require some time and space : **10' and 10 GB**.
+
+You can choose to perform tests or generate the whole enhanced dataset by setting the following parameters:
+`scale` : 1 mean 100% of the dataset - set 0.2 for tests (need 2 minutes with scale = 0.2)
+`progress_verbosity`: Verbosity of progress bar: 0=silent, 1=progress bar, 2=One line
+`output_dir` : where to write enhanced dataset, could be :
+ - `./data`, for tests purpose
+ - `<datasets_dir>/GTSRB/enhanced` to add clusters in your datasets dir.
+
+Uncomment the right lines according to what you want :
+
+%% Cell type:code id: tags:
+
+``` python
+# ---- For smart tests :
+#
+scale      = 0.2
+output_dir = './data'
+
+# ---- For a Full dataset generation :
+#
+# scale      = 1
+# output_dir = f'{datasets_dir}/GTSRB/enhanced'
+
+# ---- Verbosity
+#
+progress_verbosity = 2
+```
+
+%% Cell type:markdown id: tags:
+
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:

-ooo.init()
+``` python
+fidle.override('scale', 'output_dir', 'progress_verbosity')
 ```

 %% Cell type:markdown id: tags:

-## 2/ Read the dataset
+## Step 3 - Read the dataset
 Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
- - Each directory contains one CSV file with annotations ("GT-<ClassID>.csv") and the training images
- - First line is fieldnames: Filename;Width;Height;Roi.X1;Roi.Y1;Roi.X2;Roi.Y2;ClassId
+ - Each directory contains one CSV file with annotations : `GT-<ClassID>.csv` and the training images
+ - First line is fieldnames: `Filename ; Width ; Height ; Roi.X1 ; Roi.Y1 ; Roi.X2 ; Roi.Y2 ; ClassId`
+
+### 3.1 - Understanding the dataset
+The original dataset is in : **\<dataset_dir\>/GTSRB/origine.**
+There is 3 subsets : **Train**, **Test** and **Meta.**
+Each subset have an **csv file** and a **subdir** with **images**.

-### 2.1/ Usefull functions

 %% Cell type:code id: tags:

 ``` python
-def read_dataset_dir(csv_filename):
-    '''Reads traffic sign data from German Traffic Sign Recognition Benchmark dataset.
+df = pd.read_csv(f'{datasets_dir}/GTSRB/origine/Test.csv', header=0)
+display(df.head(10))
+```

-    Arguments:  csv filename
-                Example /data/GTSRB/Train.csv
-    Returns:   np array of images, np array of corresponding labels'''
+%% Cell type:markdown id: tags:

-    # ---- csv filename and path
-    #
-    name=os.path.basename(csv_filename)
-    path=os.path.dirname(csv_filename)
+### 3.2 - Usefull functions
+A nice function for reading a dataset from an index.csv file.\
+Input: an intex.csv file\
+Output: an array of images ans an array of corresponding labels
+
+%% Cell type:code id: tags:
+
+``` python
+def  read_csv_dataset(csv_file):
+    '''
+    Reads traffic sign data from German Traffic Sign Recognition Benchmark dataset.
+    Arguments:
+        csv filename :  Description file, Example /data/GTSRB/Train.csv
+    Returns:
+        x,y          :  np array of images, np array of corresponding labels
+    '''
+
+    path = os.path.dirname(csv_file)
+    name = os.path.basename(csv_file)

    # ---- Read csv file
    #
-    f,x,y = [],[],[]
-    with open(csv_filename) as csv_file:
-        reader = csv.DictReader(csv_file, delimiter=',')
-        for row in reader:
-            f.append( path+'/'+row['Path'] )
-            y.append( int(row['ClassId'])  )
-        csv_file.close()
-    nb_images = len(f)
+    df = pd.read_csv(csv_file, header=0)
+
+    # ---- Get filenames and ClassIds
+    #
+    filenames = df['Path'].to_list()
+    y         = df['ClassId'].to_list()
+    x         = []

    # ---- Read images
    #
-    for filename in f:
-        image=io.imread(filename)
+    for filename in filenames:
+        image=io.imread(f'{path}/{filename}')
        x.append(image)
-        ooo.update_progress(name,len(x),nb_images)
+        fidle.utils.update_progress(name,len(x),len(filenames), verbosity=progress_verbosity)
+
    # ---- Return
    #
-    return np.array(x),np.array(y)
+    return np.array(x,dtype=object),np.array(y)
 ```

 %% Cell type:markdown id: tags:

-### 2.2/ Read the data
+### 3.2 - Read the data
 We will read the following datasets:
- - **x_train, y_train** : Learning data
- - **x_test, y_test** : Validation or test data
- - x_meta, y_meta : Illustration data
+ - **Train** subset, for learning data as :  `x_train, y_train`
+ - **Test** subset, for validation data as :  `x_test, y_test`
+ - **Meta** subset, for visualisation as : `x_meta, y_meta`

-The learning data will be randomly mixted and the illustration data sorted.
-Will take about 2-3'
+The learning data will be randomly mixted and the illustration data (Meta) sorted.
+Will take about 1'30s on HPC or 45s on my labtop.

 %% Cell type:code id: tags:

 ``` python
-%%time
+chrono=fidle.Chrono()
+
+chrono.start()

 # ---- Read datasets
-(x_train,y_train) = read_dataset_dir('./data/origine/Train.csv')
-(x_test ,y_test)  = read_dataset_dir('./data/origine/Test.csv')
-(x_meta ,y_meta)  = read_dataset_dir('./data/origine/Meta.csv')
+
+(x_train,y_train) = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Train.csv')
+(x_test ,y_test)  = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Test.csv')
+(x_meta ,y_meta)  = read_csv_dataset(f'{datasets_dir}/GTSRB/origine/Meta.csv')

 # ---- Shuffle train set
-combined = list(zip(x_train,y_train))
-random.shuffle(combined)
-x_train,y_train = zip(*combined)
+
+x_train, y_train = fidle.utils.shuffle_np_dataset(x_train, y_train)

 # ---- Sort Meta
+
 combined = list(zip(x_meta,y_meta))
 combined.sort(key=lambda x: x[1])
 x_meta,y_meta = zip(*combined)
+
+chrono.show()
 ```

 %% Cell type:markdown id: tags:

-## 3/ Few statistics about train dataset
+## Step 4 - Few statistics about train dataset
 We want to know if our images are homogeneous in terms of size, ratio, width or height.

-### 3.1/ Do statistics
+### 4.1 - Do statistics

 %% Cell type:code id: tags:

 ``` python
 train_size  = []
 train_ratio = []
 train_lx    = []
 train_ly    = []

 test_size   = []
 test_ratio  = []
 test_lx     = []
 test_ly     = []

 for image in x_train:
    (lx,ly,lz) = image.shape
    train_size.append(lx*ly/1024)
    train_ratio.append(lx/ly)
    train_lx.append(lx)
    train_ly.append(ly)

 for image in x_test:
    (lx,ly,lz) = image.shape
    test_size.append(lx*ly/1024)
    test_ratio.append(lx/ly)
    test_lx.append(lx)
    test_ly.append(ly)
 ```

 %% Cell type:markdown id: tags:

-### 3.2/ Show statistics
+### 4.2 - Show statistics

 %% Cell type:code id: tags:

 ``` python
+figsize=(10,4)
 # ------ Global stuff
-print("x_train size : ",len(x_train))
-print("y_train size : ",len(y_train))
-print("x_test size  : ",len(x_test))
-print("y_test size  : ",len(y_test))
+print("x_train shape : ",x_train.shape)
+print("y_train shape : ",y_train.shape)
+print("x_test  shape : ",x_test.shape)
+print("y_test  shape : ",y_test.shape)

 # ------ Statistics / sizes
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([train_size,test_size], bins=100)
 plt.gca().set(title='Sizes in Kpixels - Train=[{:5.2f}, {:5.2f}]'.format(min(train_size),max(train_size)),
-              ylabel='Population',
-              xlim=[0,30])
+              ylabel='Population', xlim=[0,30])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('01-stats-sizes')
 plt.show()

 # ------ Statistics / ratio lx/ly
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([train_ratio,test_ratio], bins=100)
 plt.gca().set(title='Ratio lx/ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ratio),max(train_ratio)),
-              ylabel='Population',
-              xlim=[0.8,1.2])
+              ylabel='Population', xlim=[0.8,1.2])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('02-stats-ratios')
 plt.show()

 # ------ Statistics / lx
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([train_lx,test_lx], bins=100)
 plt.gca().set(title='Images lx - Train=[{:5.2f}, {:5.2f}]'.format(min(train_lx),max(train_lx)),
-              ylabel='Population',
-              xlim=[20,150])
+              ylabel='Population', xlim=[20,150])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('03-stats-lx')
 plt.show()

 # ------ Statistics / ly
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([train_ly,test_ly], bins=100)
 plt.gca().set(title='Images ly - Train=[{:5.2f}, {:5.2f}]'.format(min(train_ly),max(train_ly)),
-              ylabel='Population',
-              xlim=[20,150])
+              ylabel='Population', xlim=[20,150])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('04-stats-ly')
 plt.show()

 # ------ Statistics / classId
-plt.figure(figsize=(16,6))
+plt.figure(figsize=figsize)
 plt.hist([y_train,y_test], bins=43)
-plt.gca().set(title='ClassesId',
-              ylabel='Population',
-              xlim=[0,43])
+plt.gca().set(title='ClassesId', ylabel='Population', xlim=[0,43])
 plt.legend(['Train','Test'])
+fidle.scrawler.save_fig('05-stats-classes')
 plt.show()
 ```

 %% Cell type:markdown id: tags:

-## 4/ List of classes
+## Step 5 - List of classes
 What are the 43 classes of our images...

 %% Cell type:code id: tags:

 ``` python
-ooo.plot_images(x_meta,y_meta, range(43), columns=8, x_size=2, y_size=2,
-                                colorbar=False, y_pred=None, cm='binary')
+fidle.scrawler.images( x_meta,y_meta, range(43), columns=8, x_size=1.4, y_size=1.4,
+                       colorbar=False, y_pred=None, cm='binary', save_as='06-meta-signs')
 ```

 %% Cell type:markdown id: tags:

-## 5/ What does it really look like
+## Step 6 - What does it really look like

 %% Cell type:code id: tags:

 ``` python
 # ---- Get and show few images

 samples = [ random.randint(0,len(x_train)-1) for i in range(32)]
-ooo.plot_images(x_train,y_train, samples, columns=8, x_size=2, y_size=2, colorbar=False, y_pred=None, cm='binary')
+fidle.scrawler.images( x_train,y_train, samples, columns=8, x_size=1.5, y_size=1.5,
+                       colorbar=False, y_pred=None, cm='binary', save_as='07-real-signs')
 ```

 %% Cell type:markdown id: tags:

-## 6/ dataset cooking...
+## Step 7 - dataset cooking...
+
+Images **must** :
+ - have the **same size** to match the size of the network,
+ - be **normalized**.

-Images must have the **same size** to match the size of the network.
-It is possible to work on **rgb** or **monochrome** images and **equalize** the histograms.
-The data must be **normalized**.
+It is possible to work on **rgb** or **monochrome** images and to **equalize** the histograms.

 See : [Exposure with scikit-image](https://scikit-image.org/docs/dev/api/skimage.exposure.html)
 See : [Local histogram equalization](https://scikit-image.org/docs/dev/api/skimage.filters.rank.html#skimage.filters.rank.equalize)
 See : [Histogram equalization](https://scikit-image.org/docs/dev/api/skimage.exposure.html#skimage.exposure.equalize_hist)

-### 6.1/ Enhancement cook
+### 7.1 - Enhancement cooking
+A nice function for preparing our data.
+Input: a set of images (numpy array)
+Output: a enhanced images, resized and reprocessed (numpy array)

 %% Cell type:code id: tags:

 ``` python
-def images_enhancement(images, width=25, height=25, mode='RGB'):
+def images_enhancement(images, width=25, height=25, proc='RGB'):
    '''
    Resize and convert images - doesn't change originals.
    input images must be RGBA or RGB.
+    Note : all outputs are fixed size numpy array of float32
    args:
        images :         images list
        width,height :   new images size (25,25)
        mode :           RGB | RGB-HE | L | L-HE | L-LHE | L-CLAHE
    return:
        numpy array of enhanced images
    '''
-    modes = { 'RGB':3, 'RGB-HE':3, 'L':1, 'L-HE':1, 'L-LHE':1, 'L-CLAHE':1}
-    lz=modes[mode]
+    lz={ 'RGB':3, 'RGB-HE':3, 'L':1, 'L-HE':1, 'L-LHE':1, 'L-CLAHE':1}[proc]

    out=[]
    for img in images:

        # ---- if RGBA, convert to RGB
        if img.shape[2]==4:
            img=color.rgba2rgb(img)

        # ---- Resize
        img = transform.resize(img, (width,height))

        # ---- RGB / Histogram Equalization
-        if mode=='RGB-HE':
+        if proc=='RGB-HE':
            hsv = color.rgb2hsv(img.reshape(width,height,3))
            hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2])
            img = color.hsv2rgb(hsv)

        # ---- Grayscale
-        if mode=='L':
+        if proc=='L':
            img=color.rgb2gray(img)

        # ---- Grayscale / Histogram Equalization
-        if mode=='L-HE':
+        if proc=='L-HE':
            img=color.rgb2gray(img)
            img=exposure.equalize_hist(img)

        # ---- Grayscale / Local Histogram Equalization
-        if mode=='L-LHE':
+        if proc=='L-LHE':
            img=color.rgb2gray(img)
+            img = img_as_ubyte(img)
            img=rank.equalize(img, disk(10))/255.

        # ---- Grayscale / Contrast Limited Adaptive Histogram Equalization (CLAHE)
-        if mode=='L-CLAHE':
+        if proc=='L-CLAHE':
            img=color.rgb2gray(img)
            img=exposure.equalize_adapthist(img)

        # ---- Add image in list of list
        out.append(img)
-        ooo.update_progress('Enhancement: ',len(out),len(images))
+        fidle.utils.update_progress('Enhancement: ',len(out),len(images))

    # ---- Reshape images
    #     (-1, width,height,1) for L
    #     (-1, width,height,3) for RGB
    #
-    out = np.array(out,dtype='float64')
+    out = np.array(out,dtype='float32')
    out = out.reshape(-1,width,height,lz)
    return out
 ```

 %% Cell type:markdown id: tags:

-### 6.2/ To get an idea of the different recipes
+### 7.2 - To get an idea of the different recipes

 %% Cell type:code id: tags:

 ``` python
 i=random.randint(0,len(x_train)-16)
 x_samples = x_train[i:i+16]
 y_samples = y_train[i:i+16]

 datasets  = {}

-datasets['RGB']      = images_enhancement( x_samples, width=25, height=25, mode='RGB'  )
-datasets['RGB-HE']   = images_enhancement( x_samples, width=25, height=25, mode='RGB-HE'  )
-datasets['L']        = images_enhancement( x_samples, width=25, height=25, mode='L'  )
-datasets['L-HE']     = images_enhancement( x_samples, width=25, height=25, mode='L-HE'  )
-datasets['L-LHE']    = images_enhancement( x_samples, width=25, height=25, mode='L-LHE'  )
-datasets['L-CLAHE']  = images_enhancement( x_samples, width=25, height=25, mode='L-CLAHE'  )
+datasets['RGB']      = images_enhancement( x_samples, width=25, height=25, proc='RGB'  )
+datasets['RGB-HE']   = images_enhancement( x_samples, width=25, height=25, proc='RGB-HE'  )
+datasets['L']        = images_enhancement( x_samples, width=25, height=25, proc='L'  )
+datasets['L-HE']     = images_enhancement( x_samples, width=25, height=25, proc='L-HE'  )
+datasets['L-LHE']    = images_enhancement( x_samples, width=25, height=25, proc='L-LHE'  )
+datasets['L-CLAHE']  = images_enhancement( x_samples, width=25, height=25, proc='L-CLAHE'  )

-print('\nEXPECTED (Meta) :\n')
+fidle.utils.subtitle('EXPECTED')
 x_expected=[ x_meta[i] for i in y_samples]
-ooo.plot_images(x_expected, y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+fidle.scrawler.images(x_expected, y_samples, range(12), columns=12, x_size=1, y_size=1,
+                colorbar=False, y_pred=None, cm='binary', save_as='08-expected')

-print('\nORIGINAL IMAGES :\n')
-ooo.plot_images(x_samples,  y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+fidle.utils.subtitle('ORIGINAL')
+fidle.scrawler.images(x_samples,  y_samples, range(12), columns=12, x_size=1, y_size=1,
+                colorbar=False, y_pred=None, cm='binary', save_as='09-original')

-print('\nENHANCED :\n')
+fidle.utils.subtitle('ENHANCED')
+n=10
 for k,d in datasets.items():
    print("dataset : {}  min,max=[{:.3f},{:.3f}]  shape={}".format(k,d.min(),d.max(), d.shape))
-    ooo.plot_images(d, y_samples, range(16), columns=16, x_size=1, y_size=1, colorbar=False, y_pred=None, cm='binary')
+    fidle.scrawler.images(d, y_samples, range(12), columns=12, x_size=1, y_size=1,
+                    colorbar=False, y_pred=None, cm='binary', save_as=f'{n}-enhanced-{k}')
+    n+=1
 ```

 %% Cell type:markdown id: tags:

-### 6.3/ Cook and save
-A function to save a dataset
+### 7.3 - Cook and save
+
+A function to save a dataset (h5 file)

 %% Cell type:code id: tags:

 ``` python
-def save_h5_dataset(x_train, y_train, x_test, y_test, x_meta,y_meta, h5name):
-
-    # ---- Filename
-    filename='./data/'+h5name
+def save_h5_dataset(x_train, y_train, x_test, y_test, x_meta,y_meta, filename):

    # ---- Create h5 file
    with h5py.File(filename, "w") as f:
        f.create_dataset("x_train", data=x_train)
        f.create_dataset("y_train", data=y_train)
        f.create_dataset("x_test",  data=x_test)
        f.create_dataset("y_test",  data=y_test)
        f.create_dataset("x_meta",  data=x_meta)
        f.create_dataset("y_meta",  data=y_meta)

    # ---- done
    size=os.path.getsize(filename)/(1024*1024)
-    print('Dataset : {:24s}  shape : {:22s} size : {:6.1f} Mo   (saved)\n'.format(filename, str(x_train.shape),size))
+    print('Dataset : {:24s}  shape : {:22s} size : {:6.1f} Mo   (saved)'.format(filename, str(x_train.shape),size))
 ```

 %% Cell type:markdown id: tags:

-Create enhanced datasets, and save them...
-Will take about 7-8'
+Generate enhanced datasets :

 %% Cell type:code id: tags:

 ``` python
-%%time
+# ---- Size and processings
+#
+all_size= [24, 48]
+all_proc=['RGB', 'RGB-HE', 'L', 'L-LHE']

-for s in [24, 48]:
-    for m in ['RGB', 'RGB-HE', 'L', 'L-LHE']:
+# ---- Do it
+#
+chrono.start()
+
+n_train = int( len(x_train)*scale )
+n_test  = int( len(x_test)*scale )
+
+fidle.utils.subtitle('Parameters :')
+print(f'Scale is : {scale}')
+print(f'x_train length is : {n_train}')
+print(f'x_test  length is : {n_test}')
+print(f'output dir is     : {output_dir}\n')
+
+fidle.utils.subtitle('Running...')
+
+fidle.utils.mkdir(output_dir)
+
+for s in all_size:
+    for m in all_proc:
        # ---- A nice dataset name
-        name='set-{}x{}-{}.h5'.format(s,s,m)
-        print("\nDataset : ",name)
+        filename = f'{output_dir}/set-{s}x{s}-{m}.h5'
+        fidle.utils.subtitle(f'Dataset : {filename}')
+
        # ---- Enhancement
-        x_train_new = images_enhancement( x_train, width=s, height=s, mode=m )
-        x_test_new  = images_enhancement( x_test,  width=s, height=s, mode=m )
-        x_meta_new  = images_enhancement( x_meta,  width=s, height=s, mode='RGB' )
+        #      Note : x_train is a numpy array of python objects (images with <> sizes)
+        #             but images_enhancement() return a real array of float64 numpy (images with same size)
+        #             so, we can save it in nice h5 files
+        #
+        x_train_new = images_enhancement( x_train[:n_train], width=s, height=s, proc=m )
+        x_test_new  = images_enhancement( x_test[:n_test],   width=s, height=s, proc=m )
+        x_meta_new  = images_enhancement( x_meta,            width=s, height=s, proc='RGB' )
+
        # ---- Save
-        save_h5_dataset( x_train_new, y_train, x_test_new, y_test, x_meta_new,y_meta, name)
+        save_h5_dataset( x_train_new, y_train[:n_train], x_test_new, y_test[:n_test], x_meta_new,y_meta, filename)

 x_train_new,x_test_new=0,0
+
+print('\nDone.')
+chrono.show()
 ```

 %% Cell type:markdown id: tags:

-## 7/ Reload data to be sure ;-)
+## Step 8 - Reload data to be sure ;-)

 %% Cell type:code id: tags:

 ``` python
-%%time
+chrono.start()

-dataset='set-48x48-L'
+dataset='set-24x24-L'
 samples=range(24)

-with  h5py.File('./data/'+dataset+'.h5') as f:
+with  h5py.File(f'{output_dir}/{dataset}.h5','r') as f:
    x_tmp = f['x_train'][:]
    y_tmp = f['y_train'][:]
    print("dataset loaded from h5 file.")

-ooo.plot_images(x_tmp,y_tmp, samples, columns=8, x_size=2, y_size=2, colorbar=False, y_pred=None, cm='binary')
+fidle.scrawler.images(x_tmp,y_tmp, samples, columns=8, x_size=1.5, y_size=1.5,
+                colorbar=False, y_pred=None, cm='binary', save_as='16-enhanced_images')
 x_tmp,y_tmp=0,0
+
+chrono.show()
+```
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.end()
 ```

 %% Cell type:markdown id: tags:

----
-That's all folks !
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

--- a/GTSRB/02-First-convolutions.ipynb
+++ b/GTSRB/02-First-convolutions.ipynb
 %% Cell type:markdown id: tags:

-German Traffic Sign Recognition Benchmark (GTSRB)
-=================================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3GTSRB2] - First convolutions
+<!-- DESC --> Episode 2 : First convolutions and first classification of our traffic signs, using Keras3
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Recognizing traffic signs
+  - Understand the **principles** and **architecture** of a **convolutional neural network** for image classification
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !

-## Episode 2 : First Convolutions
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+**IMPORTANT :** To be able to use this notebook and the following, **you must have generated the enhanced datasets** in <dataset_dir>/enhanced via the notebook **[01-Preparation-of-data.ipynb](01-Preparation-of-data.ipynb)**
+
+## What we're going to do :

-Our main steps:
 - Read H5 dataset
 - Build a model
 - Train the model
 - Evaluate the model

-## 1/ Import and init
+## Step 1 - Import and init
+### 1.1 - Python stuff

 %% Cell type:code id: tags:

 ``` python
-import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras.callbacks import TensorBoard
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras

 import numpy as np
 import matplotlib.pyplot as plt
 import h5py
 import os,time,sys

 from importlib import reload

-sys.path.append('..')
-import fidle.pwk as ooo
+# Init Fidle environment
+import fidle
+
+run_id, run_dir, datasets_dir = fidle.init('K3GTSRB2')
+```
+
+%% Cell type:markdown id: tags:
+
+### 1.2 - Parameters
+`scale` is the proportion of the dataset that will be used during the training. (1 mean 100%)
+A 20% 24x24 dataset, with 5 epochs and a scale of 1, need  **3'30** on a CPU laptop.\
+`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch
+
+%% Cell type:code id: tags:
+
+``` python
+enhanced_dir = './data'
+# enhanced_dir = f'{datasets_dir}/GTSRB/enhanced'

-ooo.init()
+dataset_name  = 'set-24x24-L'
+batch_size    = 64
+epochs        = 5
+scale         = 1
+fit_verbosity = 1
 ```

 %% Cell type:markdown id: tags:

-## 2/ Load dataset
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.override('enhanced_dir', 'dataset_name', 'batch_size', 'epochs', 'scale', 'fit_verbosity')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Load dataset
 We're going to retrieve a previously recorded dataset.
 For example: set-24x24-L

 %% Cell type:code id: tags:

 ``` python
-%%time
+def read_dataset(enhanced_dir, dataset_name, scale=1):
+    '''
+    Reads h5 dataset
+    Args:
+        filename     : datasets filename
+        dataset_name : dataset name, without .h5
+    Returns:
+        x_train,y_train, x_test,y_test data, x_meta,y_meta
+    '''

-def read_dataset(name):
-    '''Reads h5 dataset from ./data
-
-    Arguments:  dataset name, without .h5
-    Returns:    x_train,y_train,x_test,y_test data'''
    # ---- Read dataset
-    filename='./data/'+name+'.h5'
-    with  h5py.File(filename) as f:
+    #
+    chrono=fidle.Chrono()
+    chrono.start()
+    filename = f'{enhanced_dir}/{dataset_name}.h5'
+    with  h5py.File(filename,'r') as f:
        x_train = f['x_train'][:]
        y_train = f['y_train'][:]
        x_test  = f['x_test'][:]
        y_test  = f['y_test'][:]
+        x_meta  = f['x_meta'][:]
+        y_meta  = f['y_meta'][:]
+
+    # ---- Rescale
+    #
+    print('Original shape  :', x_train.shape, y_train.shape)
+    x_train,y_train, x_test,y_test = fidle.utils.rescale_dataset(x_train,y_train,x_test,y_test, scale=scale)
+    print('Rescaled shape  :', x_train.shape, y_train.shape)
+
+    # ---- Shuffle
+    #
+    x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)

    # ---- done
-    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
-    return x_train,y_train,x_test,y_test
+    #
+    duration = chrono.get_delay()
+    size     = fidle.utils.hsize(os.path.getsize(filename))
+    print(f'\nDataset "{dataset_name}" is loaded and shuffled. ({size} in {duration})')
+    return x_train,y_train, x_test,y_test, x_meta,y_meta

-x_train,y_train,x_test,y_test = read_dataset('set-24x24-L')
+# ---- Read dataset
+#
+x_train,y_train,x_test,y_test, x_meta,y_meta = read_dataset(enhanced_dir, dataset_name, scale)
 ```

 %% Cell type:markdown id: tags:

-## 3/ Have a look to the dataset
+## Step 3 - Have a look to the dataset
 We take a quick look as we go by...

 %% Cell type:code id: tags:

 ``` python
 print("x_train : ", x_train.shape)
 print("y_train : ", y_train.shape)
 print("x_test  : ", x_test.shape)
 print("y_test  : ", y_test.shape)

-ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
-ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+fidle.scrawler.images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2, save_as='01-dataset-medium')
+fidle.scrawler.images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1, save_as='02-dataset-small')
 ```

 %% Cell type:markdown id: tags:

-## 4/ Create model
+## Step 4 - Create model
 We will now build a model and train it...

 Some models :

 %% Cell type:code id: tags:

 ``` python

-# A basic model
+# ------------------------------------------------------------------
+# -- A simple model, for 24x24 or 48x48 images                    --
+# ------------------------------------------------------------------
 #
-def get_model_v1(lx,ly,lz):
+def get_model_01(lx,ly,lz):

    model = keras.models.Sequential()

-    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.Input((lx,ly,lz)) )
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu' ))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Flatten())
    model.add( keras.layers.Dense(1500, activation='relu'))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Dense(43, activation='softmax'))
    return model

-# A more sophisticated model
+
+# ------------------------------------------------------------------
+# -- A more sophisticated model, for 48x48 images                 --
+# ------------------------------------------------------------------
 #
-def get_model_v2(lx,ly,lz):
+def get_model_02(lx,ly,lz):
    model = keras.models.Sequential()

-    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
-    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
-    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
-    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
+    model.add( keras.layers.Input((lx,ly,lz)) )

-    model.add( keras.layers.Flatten())
-    model.add( keras.layers.Dense(512, activation='relu'))
-    model.add( keras.layers.Dropout(0.5))
-    model.add( keras.layers.Dense(43, activation='softmax'))
-    return model
-
-# My sphisticated model, but small and fast
-#
-def get_model_v3(lx,ly,lz):
-    model = keras.models.Sequential()
-    model.add( keras.layers.Conv2D(32, (3,3),   activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.Conv2D(32, (3,3),   activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Flatten())
    model.add( keras.layers.Dense(1152, activation='relu'))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Dense(43, activation='softmax'))
    return model
 ```

 %% Cell type:markdown id: tags:

-## 5/ Train the model
+## Step 5 - Train the model
 **Get the shape of my data :**

 %% Cell type:code id: tags:

 ``` python
 (n,lx,ly,lz) = x_train.shape
 print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
 ```

 %% Cell type:markdown id: tags:

 **Get and compile a model, with the data shape :**

 %% Cell type:code id: tags:

 ``` python
-model = get_model_v1(lx,ly,lz)
+model = get_model_01(lx,ly,lz)

 model.summary()

 model.compile(optimizer = 'adam',
              loss      = 'sparse_categorical_crossentropy',
              metrics   = ['accuracy'])
 ```

 %% Cell type:markdown id: tags:

 **Train it :**

 %% Cell type:code id: tags:

 ``` python
-%%time
-
-batch_size = 64
-epochs     = 5
+chrono=fidle.Chrono()
+chrono.start()

 # ---- Shuffle train data
-x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)

 # ---- Train
 history = model.fit(  x_train, y_train,
                      batch_size      = batch_size,
                      epochs          = epochs,
-                      verbose         = 1,
+                      verbose         = fit_verbosity,
                      validation_data = (x_test, y_test))
+
+chrono.show()
 ```

 %% Cell type:markdown id: tags:

-**Evaluate it :**
+## Step 5 - Evaluate

 %% Cell type:code id: tags:

 ``` python
 max_val_accuracy = max(history.history["val_accuracy"])
 print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
 ```

 %% Cell type:code id: tags:

 ``` python
 score = model.evaluate(x_test, y_test, verbose=0)

 print('Test loss      : {:5.4f}'.format(score[0]))
 print('Test accuracy  : {:5.4f}'.format(score[1]))
 ```
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.end()
+```
+
+%% Cell type:markdown id: tags:
+
+<div class="todo">
+    What you can do:
+    <ul>
+        <li>Try the different models</li>
+        <li>Try with different datasets</li>
+        <li>Test different hyperparameters (epochs, batch size, optimization, etc.)</li>
+        <li>Create your own model</li>
+    </ul>
+</div>
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

 %% Cell type:markdown id: tags:

-German Traffic Sign Recognition Benchmark (GTSRB)
-=================================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3GTSRB2] - First convolutions
+<!-- DESC --> Episode 2 : First convolutions and first classification of our traffic signs, using Keras3
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - Recognizing traffic signs
+  - Understand the **principles** and **architecture** of a **convolutional neural network** for image classification
+
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !

-## Episode 2 : First Convolutions
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+
+**IMPORTANT :** To be able to use this notebook and the following, **you must have generated the enhanced datasets** in <dataset_dir>/enhanced via the notebook **[01-Preparation-of-data.ipynb](01-Preparation-of-data.ipynb)**
+
+## What we're going to do :

-Our main steps:
 - Read H5 dataset
 - Build a model
 - Train the model
 - Evaluate the model

-## 1/ Import and init
+## Step 1 - Import and init
+### 1.1 - Python stuff

 %% Cell type:code id: tags:

 ``` python
-import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras.callbacks import TensorBoard
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras

 import numpy as np
 import matplotlib.pyplot as plt
 import h5py
 import os,time,sys

 from importlib import reload

-sys.path.append('..')
-import fidle.pwk as ooo
+# Init Fidle environment
+import fidle
+
+run_id, run_dir, datasets_dir = fidle.init('K3GTSRB2')
+```
+
+%% Cell type:markdown id: tags:
+
+### 1.2 - Parameters
+`scale` is the proportion of the dataset that will be used during the training. (1 mean 100%)
+A 20% 24x24 dataset, with 5 epochs and a scale of 1, need  **3'30** on a CPU laptop.\
+`fit_verbosity` is the verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch
+
+%% Cell type:code id: tags:
+
+``` python
+enhanced_dir = './data'
+# enhanced_dir = f'{datasets_dir}/GTSRB/enhanced'

-ooo.init()
+dataset_name  = 'set-24x24-L'
+batch_size    = 64
+epochs        = 5
+scale         = 1
+fit_verbosity = 1
 ```

 %% Cell type:markdown id: tags:

-## 2/ Load dataset
+Override parameters (batch mode) - Just forget this cell
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.override('enhanced_dir', 'dataset_name', 'batch_size', 'epochs', 'scale', 'fit_verbosity')
+```
+
+%% Cell type:markdown id: tags:
+
+## Step 2 - Load dataset
 We're going to retrieve a previously recorded dataset.
 For example: set-24x24-L

 %% Cell type:code id: tags:

 ``` python
-%%time
+def read_dataset(enhanced_dir, dataset_name, scale=1):
+    '''
+    Reads h5 dataset
+    Args:
+        filename     : datasets filename
+        dataset_name : dataset name, without .h5
+    Returns:
+        x_train,y_train, x_test,y_test data, x_meta,y_meta
+    '''

-def read_dataset(name):
-    '''Reads h5 dataset from ./data
-
-    Arguments:  dataset name, without .h5
-    Returns:    x_train,y_train,x_test,y_test data'''
    # ---- Read dataset
-    filename='./data/'+name+'.h5'
-    with  h5py.File(filename) as f:
+    #
+    chrono=fidle.Chrono()
+    chrono.start()
+    filename = f'{enhanced_dir}/{dataset_name}.h5'
+    with  h5py.File(filename,'r') as f:
        x_train = f['x_train'][:]
        y_train = f['y_train'][:]
        x_test  = f['x_test'][:]
        y_test  = f['y_test'][:]
+        x_meta  = f['x_meta'][:]
+        y_meta  = f['y_meta'][:]
+
+    # ---- Rescale
+    #
+    print('Original shape  :', x_train.shape, y_train.shape)
+    x_train,y_train, x_test,y_test = fidle.utils.rescale_dataset(x_train,y_train,x_test,y_test, scale=scale)
+    print('Rescaled shape  :', x_train.shape, y_train.shape)
+
+    # ---- Shuffle
+    #
+    x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)

    # ---- done
-    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
-    return x_train,y_train,x_test,y_test
+    #
+    duration = chrono.get_delay()
+    size     = fidle.utils.hsize(os.path.getsize(filename))
+    print(f'\nDataset "{dataset_name}" is loaded and shuffled. ({size} in {duration})')
+    return x_train,y_train, x_test,y_test, x_meta,y_meta

-x_train,y_train,x_test,y_test = read_dataset('set-24x24-L')
+# ---- Read dataset
+#
+x_train,y_train,x_test,y_test, x_meta,y_meta = read_dataset(enhanced_dir, dataset_name, scale)
 ```

 %% Cell type:markdown id: tags:

-## 3/ Have a look to the dataset
+## Step 3 - Have a look to the dataset
 We take a quick look as we go by...

 %% Cell type:code id: tags:

 ``` python
 print("x_train : ", x_train.shape)
 print("y_train : ", y_train.shape)
 print("x_test  : ", x_test.shape)
 print("y_test  : ", y_test.shape)

-ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
-ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+fidle.scrawler.images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2, save_as='01-dataset-medium')
+fidle.scrawler.images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1, save_as='02-dataset-small')
 ```

 %% Cell type:markdown id: tags:

-## 4/ Create model
+## Step 4 - Create model
 We will now build a model and train it...

 Some models :

 %% Cell type:code id: tags:

 ``` python

-# A basic model
+# ------------------------------------------------------------------
+# -- A simple model, for 24x24 or 48x48 images                    --
+# ------------------------------------------------------------------
 #
-def get_model_v1(lx,ly,lz):
+def get_model_01(lx,ly,lz):

    model = keras.models.Sequential()

-    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.Input((lx,ly,lz)) )
+
+    model.add( keras.layers.Conv2D(96, (3,3), activation='relu' ))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.2))

    model.add( keras.layers.Flatten())
    model.add( keras.layers.Dense(1500, activation='relu'))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Dense(43, activation='softmax'))
    return model

-# A more sophisticated model
+
+# ------------------------------------------------------------------
+# -- A more sophisticated model, for 48x48 images                 --
+# ------------------------------------------------------------------
 #
-def get_model_v2(lx,ly,lz):
+def get_model_02(lx,ly,lz):
    model = keras.models.Sequential()

-    model.add( keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=(lx,ly,lz), activation='relu'))
-    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
-    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(256, (3, 3), padding='same',activation='relu'))
-    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D(pool_size=(2, 2)))
-    model.add( keras.layers.Dropout(0.2))
+    model.add( keras.layers.Input((lx,ly,lz)) )

-    model.add( keras.layers.Flatten())
-    model.add( keras.layers.Dense(512, activation='relu'))
-    model.add( keras.layers.Dropout(0.5))
-    model.add( keras.layers.Dense(43, activation='softmax'))
-    return model
-
-# My sphisticated model, but small and fast
-#
-def get_model_v3(lx,ly,lz):
-    model = keras.models.Sequential()
-    model.add( keras.layers.Conv2D(32, (3,3),   activation='relu', input_shape=(lx,ly,lz)))
+    model.add( keras.layers.Conv2D(32, (3,3),   activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Conv2D(64, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Conv2D(128, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Conv2D(256, (3, 3), activation='relu'))
    model.add( keras.layers.MaxPooling2D((2, 2)))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Flatten())
    model.add( keras.layers.Dense(1152, activation='relu'))
    model.add( keras.layers.Dropout(0.5))

    model.add( keras.layers.Dense(43, activation='softmax'))
    return model
 ```

 %% Cell type:markdown id: tags:

-## 5/ Train the model
+## Step 5 - Train the model
 **Get the shape of my data :**

 %% Cell type:code id: tags:

 ``` python
 (n,lx,ly,lz) = x_train.shape
 print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
 ```

 %% Cell type:markdown id: tags:

 **Get and compile a model, with the data shape :**

 %% Cell type:code id: tags:

 ``` python
-model = get_model_v1(lx,ly,lz)
+model = get_model_01(lx,ly,lz)

 model.summary()

 model.compile(optimizer = 'adam',
              loss      = 'sparse_categorical_crossentropy',
              metrics   = ['accuracy'])
 ```

 %% Cell type:markdown id: tags:

 **Train it :**

 %% Cell type:code id: tags:

 ``` python
-%%time
-
-batch_size = 64
-epochs     = 5
+chrono=fidle.Chrono()
+chrono.start()

 # ---- Shuffle train data
-x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)

 # ---- Train
 history = model.fit(  x_train, y_train,
                      batch_size      = batch_size,
                      epochs          = epochs,
-                      verbose         = 1,
+                      verbose         = fit_verbosity,
                      validation_data = (x_test, y_test))
+
+chrono.show()
 ```

 %% Cell type:markdown id: tags:

-**Evaluate it :**
+## Step 5 - Evaluate

 %% Cell type:code id: tags:

 ``` python
 max_val_accuracy = max(history.history["val_accuracy"])
 print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
 ```

 %% Cell type:code id: tags:

 ``` python
 score = model.evaluate(x_test, y_test, verbose=0)

 print('Test loss      : {:5.4f}'.format(score[0]))
 print('Test accuracy  : {:5.4f}'.format(score[1]))
 ```
+
+%% Cell type:code id: tags:
+
+``` python
+fidle.end()
+```
+
+%% Cell type:markdown id: tags:
+
+<div class="todo">
+    What you can do:
+    <ul>
+        <li>Try the different models</li>
+        <li>Try with different datasets</li>
+        <li>Test different hyperparameters (epochs, batch size, optimization, etc.)</li>
+        <li>Create your own model</li>
+    </ul>
+</div>
+
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

--- a/GTSRB/03-Tracking-and-visualizing.ipynb
+++ b/GTSRB/03-Tracking-and-visualizing.ipynb
 %% Cell type:markdown id: tags:

-German Traffic Sign Recognition Benchmark (GTSRB)
-=================================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3GTSRB3] - Training monitoring
+<!-- DESC --> Episode 3 : Monitoring, analysis and check points during a training session, using Keras3
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - **Understand** what happens during the **training** process
+  - Implement **monitoring**, **backup** and **recovery** solutions

-## Episode 3 : Tracking, visualizing and save models
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+## What we're going to do :

-Our main steps:
 - Monitoring and understanding our model training
 - Add recovery points
 - Analyze the results
- - Restore and run recovery pont
+ - Restore and run recovery points

-## 1/ Import and init
+## Step 1 - Import and init
+### 1.1 - Python stuffs

 %% Cell type:code id: tags:

 ``` python
-import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras.callbacks import TensorBoard
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras

 import numpy as np
-import h5py
+import os, random

-from sklearn.metrics import confusion_matrix
+import fidle

-import matplotlib.pyplot as plt
-import seaborn as sn
-import os, sys, time, random
+import modules.my_loader as my_loader
+import modules.my_models as my_models
+import modules.my_tools  as my_tools
+from modules.my_TensorboardCallback import TensorboardCallback
+
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3GTSRB3')
+```
+
+%% Cell type:markdown id: tags:

-from importlib import reload
+### 1.2 - Parameters
+`scale` is the proportion of the dataset that will be used during the training. (1 mean 100%)
+- A 20% 24x24 L dataset, 10 epochs, 20% dataset, need  1'30 on a CPU laptop. (Accuracy=91.4)\
+- A 20% 48x48 RGB dataset, 10 epochs, 20% dataset, need 6'30s on a CPU laptop. (Accuracy=91.5)

-sys.path.append('..')
-import fidle.pwk as ooo
+`model_name` is the model name from modules.my_models :
+- model_01 for 24x24 ou 48x48 images
+- model_02 for 48x48 images

-ooo.init()
+`fit_verbosity` is the verbosity during training :
+- 0 = silent, 1 = progress bar, 2 = one line per epoch
+
+%% Cell type:code id: tags:
+
+``` python
+enhanced_dir = './data'
+# enhanced_dir = f'{datasets_dir}/GTSRB/enhanced'
+
+model_name   = 'model_01'
+dataset_name = 'set-24x24-L'
+batch_size   = 64
+epochs       = 10
+scale        = 1
+fit_verbosity = 1
 ```

 %% Cell type:markdown id: tags:

-## 2/ Load dataset
-Dataset is one of the saved dataset: RGB25, RGB35, L25, L35, etc.
-First of all, we're going to use a smart dataset : **set-24x24-L**
-(with a GPU, it only takes 35'' compared to more than 5' with a CPU !)
+Override parameters (batch mode) - Just forget this cell

 %% Cell type:code id: tags:

 ``` python
-%%time
+fidle.override('enhanced_dir', 'model_name', 'dataset_name', 'batch_size', 'epochs', 'scale', 'fit_verbosity')
+```

-def read_dataset(name):
-    '''Reads h5 dataset from ./data
+%% Cell type:markdown id: tags:

-    Arguments:  dataset name, without .h5
-    Returns:    x_train,y_train,x_test,y_test data'''
-    # ---- Read dataset
-    filename='./data/'+name+'.h5'
-    with  h5py.File(filename) as f:
-        x_train = f['x_train'][:]
-        y_train = f['y_train'][:]
-        x_test  = f['x_test'][:]
-        y_test  = f['y_test'][:]
-        x_meta  = f['x_meta'][:]
-        y_meta  = f['y_meta'][:]
+## Step 2 - Load dataset
+Dataset is one of the saved dataset...

-    # ---- done
-    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
-    return x_train,y_train,x_test,y_test,x_meta,y_meta
+%% Cell type:code id: tags:

-x_train,y_train,x_test,y_test,x_meta,y_meta = read_dataset('set-24x24-L')
+``` python
+x_train,y_train,x_test,y_test, x_meta,y_meta = my_loader.read_dataset(enhanced_dir, dataset_name, scale)
 ```

 %% Cell type:markdown id: tags:

-## 3/ Have a look to the dataset
-Note: Data must be reshape for matplotlib
+## Step 3 - Have a look to the dataset

 %% Cell type:code id: tags:

 ``` python
 print("x_train : ", x_train.shape)
 print("y_train : ", y_train.shape)
 print("x_test  : ", x_test.shape)
 print("y_test  : ", y_test.shape)

-ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
-ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+fidle.scrawler.images(x_train, y_train, range(24), columns=8, x_size=1, y_size=1, save_as='02-dataset-small')
 ```

 %% Cell type:markdown id: tags:

-## 4/ Create model
-We will now build a model and train it...
-
-Some models...
+## Step 4 - Get a model

 %% Cell type:code id: tags:

 ``` python
-# A basic model
-#
-def get_model_v1(lx,ly,lz):
-
-    model = keras.models.Sequential()
-
-    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
-    model.add( keras.layers.MaxPooling2D((2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D((2, 2)))
-    model.add( keras.layers.Dropout(0.2))
+(n,lx,ly,lz) = x_train.shape

-    model.add( keras.layers.Flatten())
-    model.add( keras.layers.Dense(1500, activation='relu'))
-    model.add( keras.layers.Dropout(0.5))
+model = my_models.get_model( model_name, lx,ly,lz )
+model.summary()

-    model.add( keras.layers.Dense(43, activation='softmax'))
-    return model
+model.compile(optimizer='adam',
+              loss='sparse_categorical_crossentropy',
+              metrics=['accuracy'])
 ```

 %% Cell type:markdown id: tags:

-## 5/ Prepare callbacks
+## Step 5 - Prepare callbacks
 We will add 2 callbacks :
- - **TensorBoard**
-Training logs, which can be visualised with Tensorboard.
-`#tensorboard --logdir ./run/logs`
-IMPORTANT : Relancer tensorboard à chaque run
- - **Model backup**
- It is possible to save the model each xx epoch or at each improvement.
- The model can be saved completely or partially (weight).
- For full format, we can use HDF5 format.

-%% Cell type:code id: tags:
+**TensorBoard**
+Training logs, which can be visualised using [Tensorboard tool](https://www.tensorflow.org/tensorboard).

-``` python
-%%bash
-# To clean old logs and saved model, run this cell
-#
-/bin/rm -r ./run/logs   2>/dev/null
-/bin/rm -r ./run/models 2>/dev/null
-/bin/mkdir -p -m 755 ./run/logs
-/bin/mkdir -p -m 755 ./run/models
-echo -e "Reset directories : ./run/logs and ./run/models ."
-```
+**Model backup**
+ It is possible to save the model each xx epoch or at each improvement.
+ The model can be saved completely or partially (weight).
+ See [Keras documentation](https://keras.io/api/callbacks/)

 %% Cell type:code id: tags:

 ``` python
-ooo.mkdir('./run/models')
-ooo.mkdir('./run/logs')
-
-# ---- Callback tensorboard
-log_dir = "./run/logs/tb_" + ooo.tag_now()
-tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
-
-# ---- Callback ModelCheckpoint - Save best model
-save_dir = "./run/models/best-model.h5"
-bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+fidle.utils.mkdir(run_dir + '/models')
+fidle.utils.mkdir(run_dir + '/logs')

-# ---- Callback ModelCheckpoint - Save model each epochs
-save_dir = "./run/models/model-{epoch:04d}.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)
-```
-
-%% Cell type:markdown id: tags:
-
-## 5/ Train the model
-**Get the shape of my data :**
+# ---- Callback for tensorboard (This one is homemade !)
+#
+tenseorboard_callback = TensorboardCallback(
+                                log_dir=run_dir + "/logs/tb_" + fidle.Chrono.tag_now())

-%% Cell type:code id: tags:
+# ---- Callback to save best model
+#
+bestmodel_callback = keras.callbacks.ModelCheckpoint(
+                                filepath= run_dir + "/models/best-model.keras",
+                                monitor='val_accuracy',
+                                mode='max',
+                                save_best_only=True)

-``` python
-(n,lx,ly,lz) = x_train.shape
-print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+# ---- Callback to save model from each epochs
+#
+savemodel_callback = keras.callbacks.ModelCheckpoint(
+                                filepath= run_dir + "/models/{epoch:02d}.keras",
+                                save_freq="epoch")
 ```

 %% Cell type:markdown id: tags:

-**Get and compile a model, with the data shape :**
+## Step 6 - Train the model
+To access logs with tensorboad :
+- Under **Docker**, from a terminal launched via the jupyterlab launcher, use the following command:<br>
+```tensorboard --logdir <path-to-logs> --host 0.0.0.0```
+- If you're not using Docker, from a terminal :<br>
+```tensorboard --logdir <path-to-logs>```

-%% Cell type:code id: tags:
-
-``` python
-model = get_model_v1(lx,ly,lz)
-
-# model.summary()
-
-model.compile(optimizer='adam',
-              loss='sparse_categorical_crossentropy',
-              metrics=['accuracy'])
-```
+**Note:** One tensorboard instance can be used simultaneously.

 %% Cell type:markdown id: tags:

 **Train it :**
-Note: The training curve is visible in real time with Tensorboard :
-`#tensorboard --logdir ./run/logs`
+Note: The training curve is visible in real time with Tensorboard (see step  5)

 %% Cell type:code id: tags:

 ``` python
-%%time
-
-batch_size = 64
-epochs     = 30
+chrono=fidle.Chrono()
+chrono.start()

 # ---- Shuffle train data
-x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)

 # ---- Train
 # Note: To be faster in our example, we can take only 2000 values
 #
 history = model.fit(  x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
-                      verbose=1,
+                      verbose=fit_verbosity,
                      validation_data=(x_test, y_test),
-                      callbacks=[tensorboard_callback, bestmodel_callback, savemodel_callback] )
+                      callbacks=[tenseorboard_callback, bestmodel_callback, savemodel_callback] )
+
+model.save(f'{run_dir}/models/last-model.keras')

-model.save('./run/models/last-model.h5')
+chrono.show()
 ```

 %% Cell type:markdown id: tags:

 **Evaluate it :**

 %% Cell type:code id: tags:

 ``` python
 max_val_accuracy = max(history.history["val_accuracy"])
 print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
 ```

 %% Cell type:code id: tags:

 ``` python
 score = model.evaluate(x_test, y_test, verbose=0)

 print('Test loss      : {:5.4f}'.format(score[0]))
 print('Test accuracy  : {:5.4f}'.format(score[1]))
 ```

 %% Cell type:markdown id: tags:

-## 6/ History
+## Step 7 - History
 The return of model.fit() returns us the learning history

 %% Cell type:code id: tags:

 ``` python
-ooo.plot_history(history)
+fidle.scrawler.history(history, save_as='03-history')
 ```

 %% Cell type:markdown id: tags:

-## 7/ Evaluation and confusion
+## Step 8 - Evaluation and confusion

 %% Cell type:code id: tags:

 ``` python
-y_pred   = model.predict_classes(x_test)
-conf_mat = confusion_matrix(y_test,y_pred, normalize="true", labels=range(43))
+y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
+y_pred    = np.argmax(y_sigmoid, axis=-1)

-ooo.plot_confusion_matrix(conf_mat)
+fidle.scrawler.confusion_matrix(y_test,y_pred,range(43), figsize=(12, 12),normalize=False, save_as='04-confusion-matrix')
 ```

 %% Cell type:markdown id: tags:

-## 8/ Restore and evaluate
-### 8.1/ List saved models :
+## Step 9 - Restore and evaluate
+#### List saved models :

 %% Cell type:code id: tags:

 ``` python
-!find ./run/models/
+# !ls -1rt "$run_dir"/models/
 ```

 %% Cell type:markdown id: tags:

-### 8.2/ Restore a model :
+#### Restore a model :

 %% Cell type:code id: tags:

 ``` python
-loaded_model = tf.keras.models.load_model('./run/models/best-model.h5')
+loaded_model = keras.models.load_model(f'{run_dir}/models/best-model.keras')
 # loaded_model.summary()
 print("Loaded.")
 ```

 %% Cell type:markdown id: tags:

-### 8.3/ Evaluate it :
+#### Evaluate it :

 %% Cell type:code id: tags:

 ``` python
 score = loaded_model.evaluate(x_test, y_test, verbose=0)

 print('Test loss      : {:5.4f}'.format(score[0]))
 print('Test accuracy  : {:5.4f}'.format(score[1]))
 ```

 %% Cell type:markdown id: tags:

-### 8.4/ Make a prediction :
+#### Make a prediction :

 %% Cell type:code id: tags:

 ``` python
-# ---- Get a random image
+# ---- Pick a random image
 #
 i   = random.randint(1,len(x_test))
 x,y = x_test[i], y_test[i]

 # ---- Do prediction
 #
-predictions = loaded_model.predict( np.array([x]) )
+prediction = loaded_model.predict( np.array([x]), verbose=fit_verbosity )

-# ---- A prediction is just the output layer
-#
-print("\nOutput layer from model is (x100) :\n")
-with np.printoptions(precision=2, suppress=True, linewidth=95):
-    print(predictions*100)
-
-# ---- Graphic visualisation
-#
-print("\nGraphically :\n")
-plt.figure(figsize=(12,2))
-plt.bar(range(43), predictions[0], align='center', alpha=0.5)
-plt.ylabel('Probability')
-plt.ylim((0,1))
-plt.xlabel('Class')
-plt.title('Trafic Sign prediction')
-plt.show()
+# ---- Show result

-# ---- Predict class
-#
-p = np.argmax(predictions)
+my_tools.show_prediction( prediction, x, y, x_meta )
+```

-# ---- Show result
-#
-print("\nPrediction on the left, real stuff on the right :\n")
-ooo.plot_images([x,x_meta[y]], [p,y], range(2),  columns=3,  x_size=3, y_size=2)
+%% Cell type:code id: tags:

-if p==y:
-    print("YEEES ! that's right!")
-else:
-    print("oups, that's wrong ;-(")
+``` python
+fidle.end()
 ```

 %% Cell type:markdown id: tags:

---
-That's all folks !
+## Step 10 - To go further ;-)
+What you can do:
+- Try differents models
+- Use a subset of the dataset
+- Try different datasets
+- Try to recognize exotic signs !
+- Test different hyperparameters (epochs, batch size, optimization, etc.

-%% Cell type:code id: tags:

-``` python
-```
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

 %% Cell type:markdown id: tags:

-German Traffic Sign Recognition Benchmark (GTSRB)
-=================================================
---
-Introduction au Deep Learning  (IDLE) - S. Arias, E. Maldonado, JL. Parouty - CNRS/SARI/DEVLOG - 2020
+<img width="800px" src="../fidle/img/header.svg"></img>
+
+# <!-- TITLE --> [K3GTSRB3] - Training monitoring
+<!-- DESC --> Episode 3 : Monitoring, analysis and check points during a training session, using Keras3
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+## Objectives :
+  - **Understand** what happens during the **training** process
+  - Implement **monitoring**, **backup** and **recovery** solutions

-## Episode 3 : Tracking, visualizing and save models
+The German Traffic Sign Recognition Benchmark (GTSRB) is a dataset with more than 50,000 photos of road signs from about 40 classes.
+The final aim is to recognise them !
+Description is available there : http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
+
+## What we're going to do :

-Our main steps:
 - Monitoring and understanding our model training
 - Add recovery points
 - Analyze the results
- - Restore and run recovery pont
+ - Restore and run recovery points

-## 1/ Import and init
+## Step 1 - Import and init
+### 1.1 - Python stuffs

 %% Cell type:code id: tags:

 ``` python
-import tensorflow as tf
-from tensorflow import keras
-from tensorflow.keras.callbacks import TensorBoard
+import os
+os.environ['KERAS_BACKEND'] = 'torch'
+
+import keras

 import numpy as np
-import h5py
+import os, random

-from sklearn.metrics import confusion_matrix
+import fidle

-import matplotlib.pyplot as plt
-import seaborn as sn
-import os, sys, time, random
+import modules.my_loader as my_loader
+import modules.my_models as my_models
+import modules.my_tools  as my_tools
+from modules.my_TensorboardCallback import TensorboardCallback
+
+
+# Init Fidle environment
+run_id, run_dir, datasets_dir = fidle.init('K3GTSRB3')
+```
+
+%% Cell type:markdown id: tags:

-from importlib import reload
+### 1.2 - Parameters
+`scale` is the proportion of the dataset that will be used during the training. (1 mean 100%)
+- A 20% 24x24 L dataset, 10 epochs, 20% dataset, need  1'30 on a CPU laptop. (Accuracy=91.4)\
+- A 20% 48x48 RGB dataset, 10 epochs, 20% dataset, need 6'30s on a CPU laptop. (Accuracy=91.5)

-sys.path.append('..')
-import fidle.pwk as ooo
+`model_name` is the model name from modules.my_models :
+- model_01 for 24x24 ou 48x48 images
+- model_02 for 48x48 images

-ooo.init()
+`fit_verbosity` is the verbosity during training :
+- 0 = silent, 1 = progress bar, 2 = one line per epoch
+
+%% Cell type:code id: tags:
+
+``` python
+enhanced_dir = './data'
+# enhanced_dir = f'{datasets_dir}/GTSRB/enhanced'
+
+model_name   = 'model_01'
+dataset_name = 'set-24x24-L'
+batch_size   = 64
+epochs       = 10
+scale        = 1
+fit_verbosity = 1
 ```

 %% Cell type:markdown id: tags:

-## 2/ Load dataset
-Dataset is one of the saved dataset: RGB25, RGB35, L25, L35, etc.
-First of all, we're going to use a smart dataset : **set-24x24-L**
-(with a GPU, it only takes 35'' compared to more than 5' with a CPU !)
+Override parameters (batch mode) - Just forget this cell

 %% Cell type:code id: tags:

 ``` python
-%%time
+fidle.override('enhanced_dir', 'model_name', 'dataset_name', 'batch_size', 'epochs', 'scale', 'fit_verbosity')
+```

-def read_dataset(name):
-    '''Reads h5 dataset from ./data
+%% Cell type:markdown id: tags:

-    Arguments:  dataset name, without .h5
-    Returns:    x_train,y_train,x_test,y_test data'''
-    # ---- Read dataset
-    filename='./data/'+name+'.h5'
-    with  h5py.File(filename) as f:
-        x_train = f['x_train'][:]
-        y_train = f['y_train'][:]
-        x_test  = f['x_test'][:]
-        y_test  = f['y_test'][:]
-        x_meta  = f['x_meta'][:]
-        y_meta  = f['y_meta'][:]
+## Step 2 - Load dataset
+Dataset is one of the saved dataset...

-    # ---- done
-    print('Dataset "{}" is loaded. ({:.1f} Mo)\n'.format(name,os.path.getsize(filename)/(1024*1024)))
-    return x_train,y_train,x_test,y_test,x_meta,y_meta
+%% Cell type:code id: tags:

-x_train,y_train,x_test,y_test,x_meta,y_meta = read_dataset('set-24x24-L')
+``` python
+x_train,y_train,x_test,y_test, x_meta,y_meta = my_loader.read_dataset(enhanced_dir, dataset_name, scale)
 ```

 %% Cell type:markdown id: tags:

-## 3/ Have a look to the dataset
-Note: Data must be reshape for matplotlib
+## Step 3 - Have a look to the dataset

 %% Cell type:code id: tags:

 ``` python
 print("x_train : ", x_train.shape)
 print("y_train : ", y_train.shape)
 print("x_test  : ", x_test.shape)
 print("y_test  : ", y_test.shape)

-ooo.plot_images(x_train, y_train, range(12), columns=6,  x_size=2, y_size=2)
-ooo.plot_images(x_train, y_train, range(36), columns=12, x_size=1, y_size=1)
+fidle.scrawler.images(x_train, y_train, range(24), columns=8, x_size=1, y_size=1, save_as='02-dataset-small')
 ```

 %% Cell type:markdown id: tags:

-## 4/ Create model
-We will now build a model and train it...
-
-Some models...
+## Step 4 - Get a model

 %% Cell type:code id: tags:

 ``` python
-# A basic model
-#
-def get_model_v1(lx,ly,lz):
-
-    model = keras.models.Sequential()
-
-    model.add( keras.layers.Conv2D(96, (3,3), activation='relu', input_shape=(lx,ly,lz)))
-    model.add( keras.layers.MaxPooling2D((2, 2)))
-    model.add( keras.layers.Dropout(0.2))
-
-    model.add( keras.layers.Conv2D(192, (3, 3), activation='relu'))
-    model.add( keras.layers.MaxPooling2D((2, 2)))
-    model.add( keras.layers.Dropout(0.2))
+(n,lx,ly,lz) = x_train.shape

-    model.add( keras.layers.Flatten())
-    model.add( keras.layers.Dense(1500, activation='relu'))
-    model.add( keras.layers.Dropout(0.5))
+model = my_models.get_model( model_name, lx,ly,lz )
+model.summary()

-    model.add( keras.layers.Dense(43, activation='softmax'))
-    return model
+model.compile(optimizer='adam',
+              loss='sparse_categorical_crossentropy',
+              metrics=['accuracy'])
 ```

 %% Cell type:markdown id: tags:

-## 5/ Prepare callbacks
+## Step 5 - Prepare callbacks
 We will add 2 callbacks :
- - **TensorBoard**
-Training logs, which can be visualised with Tensorboard.
-`#tensorboard --logdir ./run/logs`
-IMPORTANT : Relancer tensorboard à chaque run
- - **Model backup**
- It is possible to save the model each xx epoch or at each improvement.
- The model can be saved completely or partially (weight).
- For full format, we can use HDF5 format.

-%% Cell type:code id: tags:
+**TensorBoard**
+Training logs, which can be visualised using [Tensorboard tool](https://www.tensorflow.org/tensorboard).

-``` python
-%%bash
-# To clean old logs and saved model, run this cell
-#
-/bin/rm -r ./run/logs   2>/dev/null
-/bin/rm -r ./run/models 2>/dev/null
-/bin/mkdir -p -m 755 ./run/logs
-/bin/mkdir -p -m 755 ./run/models
-echo -e "Reset directories : ./run/logs and ./run/models ."
-```
+**Model backup**
+ It is possible to save the model each xx epoch or at each improvement.
+ The model can be saved completely or partially (weight).
+ See [Keras documentation](https://keras.io/api/callbacks/)

 %% Cell type:code id: tags:

 ``` python
-ooo.mkdir('./run/models')
-ooo.mkdir('./run/logs')
-
-# ---- Callback tensorboard
-log_dir = "./run/logs/tb_" + ooo.tag_now()
-tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
-
-# ---- Callback ModelCheckpoint - Save best model
-save_dir = "./run/models/best-model.h5"
-bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, monitor='accuracy', save_best_only=True)
+fidle.utils.mkdir(run_dir + '/models')
+fidle.utils.mkdir(run_dir + '/logs')

-# ---- Callback ModelCheckpoint - Save model each epochs
-save_dir = "./run/models/model-{epoch:04d}.h5"
-savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_freq=2000*5)
-```
-
-%% Cell type:markdown id: tags:
-
-## 5/ Train the model
-**Get the shape of my data :**
+# ---- Callback for tensorboard (This one is homemade !)
+#
+tenseorboard_callback = TensorboardCallback(
+                                log_dir=run_dir + "/logs/tb_" + fidle.Chrono.tag_now())

-%% Cell type:code id: tags:
+# ---- Callback to save best model
+#
+bestmodel_callback = keras.callbacks.ModelCheckpoint(
+                                filepath= run_dir + "/models/best-model.keras",
+                                monitor='val_accuracy',
+                                mode='max',
+                                save_best_only=True)

-``` python
-(n,lx,ly,lz) = x_train.shape
-print("Images of the dataset have this folowing shape : ",(lx,ly,lz))
+# ---- Callback to save model from each epochs
+#
+savemodel_callback = keras.callbacks.ModelCheckpoint(
+                                filepath= run_dir + "/models/{epoch:02d}.keras",
+                                save_freq="epoch")
 ```

 %% Cell type:markdown id: tags:

-**Get and compile a model, with the data shape :**
+## Step 6 - Train the model
+To access logs with tensorboad :
+- Under **Docker**, from a terminal launched via the jupyterlab launcher, use the following command:<br>
+```tensorboard --logdir <path-to-logs> --host 0.0.0.0```
+- If you're not using Docker, from a terminal :<br>
+```tensorboard --logdir <path-to-logs>```

-%% Cell type:code id: tags:
-
-``` python
-model = get_model_v1(lx,ly,lz)
-
-# model.summary()
-
-model.compile(optimizer='adam',
-              loss='sparse_categorical_crossentropy',
-              metrics=['accuracy'])
-```
+**Note:** One tensorboard instance can be used simultaneously.

 %% Cell type:markdown id: tags:

 **Train it :**
-Note: The training curve is visible in real time with Tensorboard :
-`#tensorboard --logdir ./run/logs`
+Note: The training curve is visible in real time with Tensorboard (see step  5)

 %% Cell type:code id: tags:

 ``` python
-%%time
-
-batch_size = 64
-epochs     = 30
+chrono=fidle.Chrono()
+chrono.start()

 # ---- Shuffle train data
-x_train,y_train=ooo.shuffle_np_dataset(x_train,y_train)
+x_train,y_train=fidle.utils.shuffle_np_dataset(x_train,y_train)

 # ---- Train
 # Note: To be faster in our example, we can take only 2000 values
 #
 history = model.fit(  x_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
-                      verbose=1,
+                      verbose=fit_verbosity,
                      validation_data=(x_test, y_test),
-                      callbacks=[tensorboard_callback, bestmodel_callback, savemodel_callback] )
+                      callbacks=[tenseorboard_callback, bestmodel_callback, savemodel_callback] )
+
+model.save(f'{run_dir}/models/last-model.keras')

-model.save('./run/models/last-model.h5')
+chrono.show()
 ```

 %% Cell type:markdown id: tags:

 **Evaluate it :**

 %% Cell type:code id: tags:

 ``` python
 max_val_accuracy = max(history.history["val_accuracy"])
 print("Max validation accuracy is : {:.4f}".format(max_val_accuracy))
 ```

 %% Cell type:code id: tags:

 ``` python
 score = model.evaluate(x_test, y_test, verbose=0)

 print('Test loss      : {:5.4f}'.format(score[0]))
 print('Test accuracy  : {:5.4f}'.format(score[1]))
 ```

 %% Cell type:markdown id: tags:

-## 6/ History
+## Step 7 - History
 The return of model.fit() returns us the learning history

 %% Cell type:code id: tags:

 ``` python
-ooo.plot_history(history)
+fidle.scrawler.history(history, save_as='03-history')
 ```

 %% Cell type:markdown id: tags:

-## 7/ Evaluation and confusion
+## Step 8 - Evaluation and confusion

 %% Cell type:code id: tags:

 ``` python
-y_pred   = model.predict_classes(x_test)
-conf_mat = confusion_matrix(y_test,y_pred, normalize="true", labels=range(43))
+y_sigmoid = model.predict(x_test, verbose=fit_verbosity)
+y_pred    = np.argmax(y_sigmoid, axis=-1)

-ooo.plot_confusion_matrix(conf_mat)
+fidle.scrawler.confusion_matrix(y_test,y_pred,range(43), figsize=(12, 12),normalize=False, save_as='04-confusion-matrix')
 ```

 %% Cell type:markdown id: tags:

-## 8/ Restore and evaluate
-### 8.1/ List saved models :
+## Step 9 - Restore and evaluate
+#### List saved models :

 %% Cell type:code id: tags:

 ``` python
-!find ./run/models/
+# !ls -1rt "$run_dir"/models/
 ```

 %% Cell type:markdown id: tags:

-### 8.2/ Restore a model :
+#### Restore a model :

 %% Cell type:code id: tags:

 ``` python
-loaded_model = tf.keras.models.load_model('./run/models/best-model.h5')
+loaded_model = keras.models.load_model(f'{run_dir}/models/best-model.keras')
 # loaded_model.summary()
 print("Loaded.")
 ```

 %% Cell type:markdown id: tags:

-### 8.3/ Evaluate it :
+#### Evaluate it :

 %% Cell type:code id: tags:

 ``` python
 score = loaded_model.evaluate(x_test, y_test, verbose=0)

 print('Test loss      : {:5.4f}'.format(score[0]))
 print('Test accuracy  : {:5.4f}'.format(score[1]))
 ```

 %% Cell type:markdown id: tags:

-### 8.4/ Make a prediction :
+#### Make a prediction :

 %% Cell type:code id: tags:

 ``` python
-# ---- Get a random image
+# ---- Pick a random image
 #
 i   = random.randint(1,len(x_test))
 x,y = x_test[i], y_test[i]

 # ---- Do prediction
 #
-predictions = loaded_model.predict( np.array([x]) )
+prediction = loaded_model.predict( np.array([x]), verbose=fit_verbosity )

-# ---- A prediction is just the output layer
-#
-print("\nOutput layer from model is (x100) :\n")
-with np.printoptions(precision=2, suppress=True, linewidth=95):
-    print(predictions*100)
-
-# ---- Graphic visualisation
-#
-print("\nGraphically :\n")
-plt.figure(figsize=(12,2))
-plt.bar(range(43), predictions[0], align='center', alpha=0.5)
-plt.ylabel('Probability')
-plt.ylim((0,1))
-plt.xlabel('Class')
-plt.title('Trafic Sign prediction')
-plt.show()
+# ---- Show result

-# ---- Predict class
-#
-p = np.argmax(predictions)
+my_tools.show_prediction( prediction, x, y, x_meta )
+```

-# ---- Show result
-#
-print("\nPrediction on the left, real stuff on the right :\n")
-ooo.plot_images([x,x_meta[y]], [p,y], range(2),  columns=3,  x_size=3, y_size=2)
+%% Cell type:code id: tags:

-if p==y:
-    print("YEEES ! that's right!")
-else:
-    print("oups, that's wrong ;-(")
+``` python
+fidle.end()
 ```

 %% Cell type:markdown id: tags:

---
-That's all folks !
+## Step 10 - To go further ;-)
+What you can do:
+- Try differents models
+- Use a subset of the dataset
+- Try different datasets
+- Try to recognize exotic signs !
+- Test different hyperparameters (epochs, batch size, optimization, etc.

-%% Cell type:code id: tags:

-``` python
-```
+%% Cell type:markdown id: tags:
+
+---
+<img width="80px" src="../fidle/img/logo-paysage.svg"></img>

--- a/GTSRB.Keras3/batch_oar.sh
+++ b/GTSRB.Keras3/batch_oar.sh
+#!/bin/bash
+#OAR -n Full convolutions
+#OAR -t gpu
+#OAR -l /nodes=1/gpudevice=1,walltime=01:00:00
+#OAR --stdout full_convolutions_%jobid%.out
+#OAR --stderr full_convolutions_%jobid%.err
+#OAR --project fidle
+
+#---- Note for cpu, set :
+# OAR -l /nodes=1/core=32,walltime=02:00:00
+# and add a 2>/dev/null to ipython xxx
+
+# -----------------------------------------------
+#         _           _       _
+#        | |__   __ _| |_ ___| |__
+#        | '_ \ / _` | __/ __| '_ \
+#        | |_) | (_| | || (__| | | |
+#        |_.__/ \__,_|\__\___|_| |_|
+#                             Fidle at GRICAD
+# -----------------------------------------------
+#
+# <!-- TITLE --> [K3GTSRB10] - OAR batch script submission
+# <!-- DESC -->  Bash script for an OAR batch submission of an ipython code
+# <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+
+# ==== Notebook parameters =========================================
+
+CONDA_ENV='fidle'
+NOTEBOOK_DIR="~/fidle/GTSRB"
+
+SCRIPT_IPY="03-Better-convolutions.py"
+
+# ---- Environment vars used to override notebook/script parameters
+
+# 'enhanced_dir', 'model_name', 'dataset_name', 'batch_size', 'epochs', 'scale', 'fit_verbosity'
+
+export FIDLE_OVERRIDE_GTSRB3_run_dir="./data"
+export FIDLE_OVERRIDE_GTSRB3_enhanced_dir="./run/GTSRB3"
+export FIDLE_OVERRIDE_GTSRB3_model_name="model_01"
+export FIDLE_OVERRIDE_GTSRB3_dataset_name="set-24x24-L"
+export FIDLE_OVERRIDE_GTSRB3_batch_size=64
+export FIDLE_OVERRIDE_GTSRB3_epochs=5
+export FIDLE_OVERRIDE_GTSRB3_scale=1
+export FIDLE_OVERRIDE_GTSRB3_fit_verbosity=0
+
+# ==================================================================
+
+echo '------------------------------------------------------------'
+echo "Start : $0"
+echo '------------------------------------------------------------'
+echo "Notebook dir  : $NOTEBOOK_DIR"
+echo "Script        : $SCRIPT_IPY"
+echo "Environment   : $CONDA_ENV"
+echo '------------------------------------------------------------'
+env | grep FIDLE_OVERRIDE | awk 'BEGIN { FS = "=" } ; { printf("%-35s : %s\n",$1,$2) }'
+echo '------------------------------------------------------------'
+
+source /applis/environments/cuda_env.sh dahu 10.0
+source /applis/environments/conda.sh
+#
+conda activate "$CONDA_ENV"
+
+# ---- Run it...
+#
+cd $NOTEBOOK_DIR
+
+ipython "$SCRIPT_IPY"
+
+echo 'Done.'
--- a/GTSRB.Keras3/batch_slurm.sh
+++ b/GTSRB.Keras3/batch_slurm.sh
+#!/bin/bash
+# -----------------------------------------------
+#         _           _       _
+#        | |__   __ _| |_ ___| |__
+#        | '_ \ / _` | __/ __| '_ \
+#        | |_) | (_| | || (__| | | |
+#        |_.__/ \__,_|\__\___|_| |_|
+#                              Fidle at IDRIS
+# -----------------------------------------------
+#
+# <!-- TITLE --> [K3GTSRB11] - SLURM batch script
+# <!-- DESC --> Bash script for a Slurm batch submission of an ipython code
+# <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+#
+# Soumission :  sbatch  /(...)/fidle/GTSRB/batch_slurm.sh
+# Suivi      :  squeue -u $USER
+
+# ==== Job parameters ==============================================
+
+#SBATCH --job-name="GTSRB"                             # nom du job
+#SBATCH --ntasks=1                                     # nombre de tâche (un unique processus ici)
+#SBATCH --gres=gpu:1                                   # nombre de GPU à réserver (un unique GPU ici)
+#SBATCH --cpus-per-task=10                             # nombre de coeurs à réserver (un quart du noeud)
+#SBATCH --hint=nomultithread                           # on réserve des coeurs physiques et non logiques
+#SBATCH --time=01:00:00                                # temps exécution maximum demande (HH:MM:SS)
+#SBATCH --output="GTSRB_%j.out"                        # nom du fichier de sortie
+#SBATCH --error="GTSRB_%j.err"                         # nom du fichier d'erreur (ici commun avec la sortie)
+#SBATCH --mail-user=Jean-Luc.Parouty@grenoble-inp.fr
+#SBATCH --mail-type=ALL
+
+# ==== Notebook parameters =========================================
+
+MODULE_ENV="tensorflow-gpu/py3/2.4.0"
+NOTEBOOK_DIR="$WORK/fidle/GTSRB"
+
+SCRIPT_IPY="03-Better-convolutions.py"
+
+# ---- Environment vars used to override notebook/script parameters
+#
+export FIDLE_OVERRIDE_GTSRB3_run_dir="./data"
+export FIDLE_OVERRIDE_GTSRB3_enhanced_dir="./run/GTSRB3"
+export FIDLE_OVERRIDE_GTSRB3_model_name="model_01"
+export FIDLE_OVERRIDE_GTSRB3_dataset_name="set-24x24-L"
+export FIDLE_OVERRIDE_GTSRB3_batch_size=64
+export FIDLE_OVERRIDE_GTSRB3_epochs=5
+export FIDLE_OVERRIDE_GTSRB3_scale=1
+export FIDLE_OVERRIDE_GTSRB3_fit_verbosity=0
+
+# ==================================================================
+
+echo '------------------------------------------------------------'
+echo "Start : $0"
+echo '------------------------------------------------------------'
+echo "Job id        : $SLURM_JOB_ID"
+echo "Job name      : $SLURM_JOB_NAME"
+echo "Job node list : $SLURM_JOB_NODELIST"
+echo '------------------------------------------------------------'
+echo "Notebook dir  : $NOTEBOOK_DIR"
+echo "Script        : $SCRIPT_IPY"
+echo "Environment   : $MODULE_ENV"
+echo '------------------------------------------------------------'
+env | grep FIDLE_OVERRIDE | awk 'BEGIN { FS = "=" } ; { printf("%-35s : %s\n",$1,$2) }'
+echo '------------------------------------------------------------'
+
+# ---- Module
+
+module purge
+module load "$MODULE_ENV"
+
+# ---- Run it...
+
+cd $NOTEBOOK_DIR
+
+ipython "$SCRIPT_IPY"
+
+echo 'Done.'
\ No newline at end of file
--- a/GTSRB.Keras3/modules/ImagenetClassnames.json
+++ b/GTSRB.Keras3/modules/ImagenetClassnames.json
+{
+   "0":"tench, Tinca tinca",
+   "1":"goldfish, Carassius auratus",
+   "2":"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias",
+   "3":"tiger shark, Galeocerdo cuvieri",
+   "4":"hammerhead, hammerhead shark",
+   "5":"electric ray, crampfish, numbfish, torpedo",
+   "6":"stingray",
+   "7":"cock",
+   "8":"hen",
+   "9":"ostrich, Struthio camelus",
+   "10":"brambling, Fringilla montifringilla",
+   "11":"goldfinch, Carduelis carduelis",
+   "12":"house finch, linnet, Carpodacus mexicanus",
+   "13":"junco, snowbird",
+   "14":"indigo bunting, indigo finch, indigo bird, Passerina cyanea",
+   "15":"robin, American robin, Turdus migratorius",
+   "16":"bulbul",
+   "17":"jay",
+   "18":"magpie",
+   "19":"chickadee",
+   "20":"water ouzel, dipper",
+   "21":"kite",
+   "22":"bald eagle, American eagle, Haliaeetus leucocephalus",
+   "23":"vulture",
+   "24":"great grey owl, great gray owl, Strix nebulosa",
+   "25":"European fire salamander, Salamandra salamandra",
+   "26":"common newt, Triturus vulgaris",
+   "27":"eft",
+   "28":"spotted salamander, Ambystoma maculatum",
+   "29":"axolotl, mud puppy, Ambystoma mexicanum",
+   "30":"bullfrog, Rana catesbeiana",
+   "31":"tree frog, tree-frog",
+   "32":"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
+   "33":"loggerhead, loggerhead turtle, Caretta caretta",
+   "34":"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea",
+   "35":"mud turtle",
+   "36":"terrapin",
+   "37":"box turtle, box tortoise",
+   "38":"banded gecko",
+   "39":"common iguana, iguana, Iguana iguana",
+   "40":"American chameleon, anole, Anolis carolinensis",
+   "41":"whiptail, whiptail lizard",
+   "42":"agama",
+   "43":"frilled lizard, Chlamydosaurus kingi",
+   "44":"alligator lizard",
+   "45":"Gila monster, Heloderma suspectum",
+   "46":"green lizard, Lacerta viridis",
+   "47":"African chameleon, Chamaeleo chamaeleon",
+   "48":"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis",
+   "49":"African crocodile, Nile crocodile, Crocodylus niloticus",
+   "50":"American alligator, Alligator mississipiensis",
+   "51":"triceratops",
+   "52":"thunder snake, worm snake, Carphophis amoenus",
+   "53":"ringneck snake, ring-necked snake, ring snake",
+   "54":"hognose snake, puff adder, sand viper",
+   "55":"green snake, grass snake",
+   "56":"king snake, kingsnake",
+   "57":"garter snake, grass snake",
+   "58":"water snake",
+   "59":"vine snake",
+   "60":"night snake, Hypsiglena torquata",
+   "61":"boa constrictor, Constrictor constrictor",
+   "62":"rock python, rock snake, Python sebae",
+   "63":"Indian cobra, Naja naja",
+   "64":"green mamba",
+   "65":"sea snake",
+   "66":"horned viper, cerastes, sand viper, horned asp, Cerastes cornutus",
+   "67":"diamondback, diamondback rattlesnake, Crotalus adamanteus",
+   "68":"sidewinder, horned rattlesnake, Crotalus cerastes",
+   "69":"trilobite",
+   "70":"harvestman, daddy longlegs, Phalangium opilio",
+   "71":"scorpion",
+   "72":"black and gold garden spider, Argiope aurantia",
+   "73":"barn spider, Araneus cavaticus",
+   "74":"garden spider, Aranea diademata",
+   "75":"black widow, Latrodectus mactans",
+   "76":"tarantula",
+   "77":"wolf spider, hunting spider",
+   "78":"tick",
+   "79":"centipede",
+   "80":"black grouse",
+   "81":"ptarmigan",
+   "82":"ruffed grouse, partridge, Bonasa umbellus",
+   "83":"prairie chicken, prairie grouse, prairie fowl",
+   "84":"peacock",
+   "85":"quail",
+   "86":"partridge",
+   "87":"African grey, African gray, Psittacus erithacus",
+   "88":"macaw",
+   "89":"sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita",
+   "90":"lorikeet",
+   "91":"coucal",
+   "92":"bee eater",
+   "93":"hornbill",
+   "94":"hummingbird",
+   "95":"jacamar",
+   "96":"toucan",
+   "97":"drake",
+   "98":"red-breasted merganser, Mergus serrator",
+   "99":"goose",
+   "100":"black swan, Cygnus atratus",
+   "101":"tusker",
+   "102":"echidna, spiny anteater, anteater",
+   "103":"platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus",
+   "104":"wallaby, brush kangaroo",
+   "105":"koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus",
+   "106":"wombat",
+   "107":"jellyfish",
+   "108":"sea anemone, anemone",
+   "109":"brain coral",
+   "110":"flatworm, platyhelminth",
+   "111":"nematode, nematode worm, roundworm",
+   "112":"conch",
+   "113":"snail",
+   "114":"slug",
+   "115":"sea slug, nudibranch",
+   "116":"chiton, coat-of-mail shell, sea cradle, polyplacophore",
+   "117":"chambered nautilus, pearly nautilus, nautilus",
+   "118":"Dungeness crab, Cancer magister",
+   "119":"rock crab, Cancer irroratus",
+   "120":"fiddler crab",
+   "121":"king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica",
+   "122":"American lobster, Northern lobster, Maine lobster, Homarus americanus",
+   "123":"spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish",
+   "124":"crayfish, crawfish, crawdad, crawdaddy",
+   "125":"hermit crab",
+   "126":"isopod",
+   "127":"white stork, Ciconia ciconia",
+   "128":"black stork, Ciconia nigra",
+   "129":"spoonbill",
+   "130":"flamingo",
+   "131":"little blue heron, Egretta caerulea",
+   "132":"American egret, great white heron, Egretta albus",
+   "133":"bittern",
+   "134":"crane",
+   "135":"limpkin, Aramus pictus",
+   "136":"European gallinule, Porphyrio porphyrio",
+   "137":"American coot, marsh hen, mud hen, water hen, Fulica americana",
+   "138":"bustard",
+   "139":"ruddy turnstone, Arenaria interpres",
+   "140":"red-backed sandpiper, dunlin, Erolia alpina",
+   "141":"redshank, Tringa totanus",
+   "142":"dowitcher",
+   "143":"oystercatcher, oyster catcher",
+   "144":"pelican",
+   "145":"king penguin, Aptenodytes patagonica",
+   "146":"albatross, mollymawk",
+   "147":"grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus",
+   "148":"killer whale, killer, orca, grampus, sea wolf, Orcinus orca",
+   "149":"dugong, Dugong dugon",
+   "150":"sea lion",
+   "151":"Chihuahua",
+   "152":"Japanese spaniel",
+   "153":"Maltese dog, Maltese terrier, Maltese",
+   "154":"Pekinese, Pekingese, Peke",
+   "155":"Shih-Tzu",
+   "156":"Blenheim spaniel",
+   "157":"papillon",
+   "158":"toy terrier",
+   "159":"Rhodesian ridgeback",
+   "160":"Afghan hound, Afghan",
+   "161":"basset, basset hound",
+   "162":"beagle",
+   "163":"bloodhound, sleuthhound",
+   "164":"bluetick",
+   "165":"black-and-tan coonhound",
+   "166":"Walker hound, Walker foxhound",
+   "167":"English foxhound",
+   "168":"redbone",
+   "169":"borzoi, Russian wolfhound",
+   "170":"Irish wolfhound",
+   "171":"Italian greyhound",
+   "172":"whippet",
+   "173":"Ibizan hound, Ibizan Podenco",
+   "174":"Norwegian elkhound, elkhound",
+   "175":"otterhound, otter hound",
+   "176":"Saluki, gazelle hound",
+   "177":"Scottish deerhound, deerhound",
+   "178":"Weimaraner",
+   "179":"Staffordshire bullterrier, Staffordshire bull terrier",
+   "180":"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier",
+   "181":"Bedlington terrier",
+   "182":"Border terrier",
+   "183":"Kerry blue terrier",
+   "184":"Irish terrier",
+   "185":"Norfolk terrier",
+   "186":"Norwich terrier",
+   "187":"Yorkshire terrier",
+   "188":"wire-haired fox terrier",
+   "189":"Lakeland terrier",
+   "190":"Sealyham terrier, Sealyham",
+   "191":"Airedale, Airedale terrier",
+   "192":"cairn, cairn terrier",
+   "193":"Australian terrier",
+   "194":"Dandie Dinmont, Dandie Dinmont terrier",
+   "195":"Boston bull, Boston terrier",
+   "196":"miniature schnauzer",
+   "197":"giant schnauzer",
+   "198":"standard schnauzer",
+   "199":"Scotch terrier, Scottish terrier, Scottie",
+   "200":"Tibetan terrier, chrysanthemum dog",
+   "201":"silky terrier, Sydney silky",
+   "202":"soft-coated wheaten terrier",
+   "203":"West Highland white terrier",
+   "204":"Lhasa, Lhasa apso",
+   "205":"flat-coated retriever",
+   "206":"curly-coated retriever",
+   "207":"golden retriever",
+   "208":"Labrador retriever",
+   "209":"Chesapeake Bay retriever",
+   "210":"German short-haired pointer",
+   "211":"vizsla, Hungarian pointer",
+   "212":"English setter",
+   "213":"Irish setter, red setter",
+   "214":"Gordon setter",
+   "215":"Brittany spaniel",
+   "216":"clumber, clumber spaniel",
+   "217":"English springer, English springer spaniel",
+   "218":"Welsh springer spaniel",
+   "219":"cocker spaniel, English cocker spaniel, cocker",
+   "220":"Sussex spaniel",
+   "221":"Irish water spaniel",
+   "222":"kuvasz",
+   "223":"schipperke",
+   "224":"groenendael",
+   "225":"malinois",
+   "226":"briard",
+   "227":"kelpie",
+   "228":"komondor",
+   "229":"Old English sheepdog, bobtail",
+   "230":"Shetland sheepdog, Shetland sheep dog, Shetland",
+   "231":"collie",
+   "232":"Border collie",
+   "233":"Bouvier des Flandres, Bouviers des Flandres",
+   "234":"Rottweiler",
+   "235":"German shepherd, German shepherd dog, German police dog, alsatian",
+   "236":"Doberman, Doberman pinscher",
+   "237":"miniature pinscher",
+   "238":"Greater Swiss Mountain dog",
+   "239":"Bernese mountain dog",
+   "240":"Appenzeller",
+   "241":"EntleBucher",
+   "242":"boxer",
+   "243":"bull mastiff",
+   "244":"Tibetan mastiff",
+   "245":"French bulldog",
+   "246":"Great Dane",
+   "247":"Saint Bernard, St Bernard",
+   "248":"Eskimo dog, husky",
+   "249":"malamute, malemute, Alaskan malamute",
+   "250":"Siberian husky",
+   "251":"dalmatian, coach dog, carriage dog",
+   "252":"affenpinscher, monkey pinscher, monkey dog",
+   "253":"basenji",
+   "254":"pug, pug-dog",
+   "255":"Leonberg",
+   "256":"Newfoundland, Newfoundland dog",
+   "257":"Great Pyrenees",
+   "258":"Samoyed, Samoyede",
+   "259":"Pomeranian",
+   "260":"chow, chow chow",
+   "261":"keeshond",
+   "262":"Brabancon griffon",
+   "263":"Pembroke, Pembroke Welsh corgi",
+   "264":"Cardigan, Cardigan Welsh corgi",
+   "265":"toy poodle",
+   "266":"miniature poodle",
+   "267":"standard poodle",
+   "268":"Mexican hairless",
+   "269":"timber wolf, grey wolf, gray wolf, Canis lupus",
+   "270":"white wolf, Arctic wolf, Canis lupus tundrarum",
+   "271":"red wolf, maned wolf, Canis rufus, Canis niger",
+   "272":"coyote, prairie wolf, brush wolf, Canis latrans",
+   "273":"dingo, warrigal, warragal, Canis dingo",
+   "274":"dhole, Cuon alpinus",
+   "275":"African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus",
+   "276":"hyena, hyaena",
+   "277":"red fox, Vulpes vulpes",
+   "278":"kit fox, Vulpes macrotis",
+   "279":"Arctic fox, white fox, Alopex lagopus",
+   "280":"grey fox, gray fox, Urocyon cinereoargenteus",
+   "281":"tabby, tabby cat",
+   "282":"tiger cat",
+   "283":"Persian cat",
+   "284":"Siamese cat, Siamese",
+   "285":"Egyptian cat",
+   "286":"cougar, puma, catamount, mountain lion, painter, panther, Felis concolor",
+   "287":"lynx, catamount",
+   "288":"leopard, Panthera pardus",
+   "289":"snow leopard, ounce, Panthera uncia",
+   "290":"jaguar, panther, Panthera onca, Felis onca",
+   "291":"lion, king of beasts, Panthera leo",
+   "292":"tiger, Panthera tigris",
+   "293":"cheetah, chetah, Acinonyx jubatus",
+   "294":"brown bear, bruin, Ursus arctos",
+   "295":"American black bear, black bear, Ursus americanus, Euarctos americanus",
+   "296":"ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus",
+   "297":"sloth bear, Melursus ursinus, Ursus ursinus",
+   "298":"mongoose",
+   "299":"meerkat, mierkat",
+   "300":"tiger beetle",
+   "301":"ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle",
+   "302":"ground beetle, carabid beetle",
+   "303":"long-horned beetle, longicorn, longicorn beetle",
+   "304":"leaf beetle, chrysomelid",
+   "305":"dung beetle",
+   "306":"rhinoceros beetle",
+   "307":"weevil",
+   "308":"fly",
+   "309":"bee",
+   "310":"ant, emmet, pismire",
+   "311":"grasshopper, hopper",
+   "312":"cricket",
+   "313":"walking stick, walkingstick, stick insect",
+   "314":"cockroach, roach",
+   "315":"mantis, mantid",
+   "316":"cicada, cicala",
+   "317":"leafhopper",
+   "318":"lacewing, lacewing fly",
+   "319":"dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+   "320":"damselfly",
+   "321":"admiral",
+   "322":"ringlet, ringlet butterfly",
+   "323":"monarch, monarch butterfly, milkweed butterfly, Danaus plexippus",
+   "324":"cabbage butterfly",
+   "325":"sulphur butterfly, sulfur butterfly",
+   "326":"lycaenid, lycaenid butterfly",
+   "327":"starfish, sea star",
+   "328":"sea urchin",
+   "329":"sea cucumber, holothurian",
+   "330":"wood rabbit, cottontail, cottontail rabbit",
+   "331":"hare",
+   "332":"Angora, Angora rabbit",
+   "333":"hamster",
+   "334":"porcupine, hedgehog",
+   "335":"fox squirrel, eastern fox squirrel, Sciurus niger",
+   "336":"marmot",
+   "337":"beaver",
+   "338":"guinea pig, Cavia cobaya",
+   "339":"sorrel",
+   "340":"zebra",
+   "341":"hog, pig, grunter, squealer, Sus scrofa",
+   "342":"wild boar, boar, Sus scrofa",
+   "343":"warthog",
+   "344":"hippopotamus, hippo, river horse, Hippopotamus amphibius",
+   "345":"ox",
+   "346":"water buffalo, water ox, Asiatic buffalo, Bubalus bubalis",
+   "347":"bison",
+   "348":"ram, tup",
+   "349":"bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis",
+   "350":"ibex, Capra ibex",
+   "351":"hartebeest",
+   "352":"impala, Aepyceros melampus",
+   "353":"gazelle",
+   "354":"Arabian camel, dromedary, Camelus dromedarius",
+   "355":"llama",
+   "356":"weasel",
+   "357":"mink",
+   "358":"polecat, fitch, foulmart, foumart, Mustela putorius",
+   "359":"black-footed ferret, ferret, Mustela nigripes",
+   "360":"otter",
+   "361":"skunk, polecat, wood pussy",
+   "362":"badger",
+   "363":"armadillo",
+   "364":"three-toed sloth, ai, Bradypus tridactylus",
+   "365":"orangutan, orang, orangutang, Pongo pygmaeus",
+   "366":"gorilla, Gorilla gorilla",
+   "367":"chimpanzee, chimp, Pan troglodytes",
+   "368":"gibbon, Hylobates lar",
+   "369":"siamang, Hylobates syndactylus, Symphalangus syndactylus",
+   "370":"guenon, guenon monkey",
+   "371":"patas, hussar monkey, Erythrocebus patas",
+   "372":"baboon",
+   "373":"macaque",
+   "374":"langur",
+   "375":"colobus, colobus monkey",
+   "376":"proboscis monkey, Nasalis larvatus",
+   "377":"marmoset",
+   "378":"capuchin, ringtail, Cebus capucinus",
+   "379":"howler monkey, howler",
+   "380":"titi, titi monkey",
+   "381":"spider monkey, Ateles geoffroyi",
+   "382":"squirrel monkey, Saimiri sciureus",
+   "383":"Madagascar cat, ring-tailed lemur, Lemur catta",
+   "384":"indri, indris, Indri indri, Indri brevicaudatus",
+   "385":"Indian elephant, Elephas maximus",
+   "386":"African elephant, Loxodonta africana",
+   "387":"lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens",
+   "388":"giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca",
+   "389":"barracouta, snoek",
+   "390":"eel",
+   "391":"coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch",
+   "392":"rock beauty, Holocanthus tricolor",
+   "393":"anemone fish",
+   "394":"sturgeon",
+   "395":"gar, garfish, garpike, billfish, Lepisosteus osseus",
+   "396":"lionfish",
+   "397":"puffer, pufferfish, blowfish, globefish",
+   "398":"abacus",
+   "399":"abaya",
+   "400":"academic gown, academic robe, judge's robe",
+   "401":"accordion, piano accordion, squeeze box",
+   "402":"acoustic guitar",
+   "403":"aircraft carrier, carrier, flattop, attack aircraft carrier",
+   "404":"airliner",
+   "405":"airship, dirigible",
+   "406":"altar",
+   "407":"ambulance",
+   "408":"amphibian, amphibious vehicle",
+   "409":"analog clock",
+   "410":"apiary, bee house",
+   "411":"apron",
+   "412":"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin",
+   "413":"assault rifle, assault gun",
+   "414":"backpack, back pack, knapsack, packsack, rucksack, haversack",
+   "415":"bakery, bakeshop, bakehouse",
+   "416":"balance beam, beam",
+   "417":"balloon",
+   "418":"ballpoint, ballpoint pen, ballpen, Biro",
+   "419":"Band Aid",
+   "420":"banjo",
+   "421":"bannister, banister, balustrade, balusters, handrail",
+   "422":"barbell",
+   "423":"barber chair",
+   "424":"barbershop",
+   "425":"barn",
+   "426":"barometer",
+   "427":"barrel, cask",
+   "428":"barrow, garden cart, lawn cart, wheelbarrow",
+   "429":"baseball",
+   "430":"basketball",
+   "431":"bassinet",
+   "432":"bassoon",
+   "433":"bathing cap, swimming cap",
+   "434":"bath towel",
+   "435":"bathtub, bathing tub, bath, tub",
+   "436":"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon",
+   "437":"beacon, lighthouse, beacon light, pharos",
+   "438":"beaker",
+   "439":"bearskin, busby, shako",
+   "440":"beer bottle",
+   "441":"beer glass",
+   "442":"bell cote, bell cot",
+   "443":"bib",
+   "444":"bicycle-built-for-two, tandem bicycle, tandem",
+   "445":"bikini, two-piece",
+   "446":"binder, ring-binder",
+   "447":"binoculars, field glasses, opera glasses",
+   "448":"birdhouse",
+   "449":"boathouse",
+   "450":"bobsled, bobsleigh, bob",
+   "451":"bolo tie, bolo, bola tie, bola",
+   "452":"bonnet, poke bonnet",
+   "453":"bookcase",
+   "454":"bookshop, bookstore, bookstall",
+   "455":"bottlecap",
+   "456":"bow",
+   "457":"bow tie, bow-tie, bowtie",
+   "458":"brass, memorial tablet, plaque",
+   "459":"brassiere, bra, bandeau",
+   "460":"breakwater, groin, groyne, mole, bulwark, seawall, jetty",
+   "461":"breastplate, aegis, egis",
+   "462":"broom",
+   "463":"bucket, pail",
+   "464":"buckle",
+   "465":"bulletproof vest",
+   "466":"bullet train, bullet",
+   "467":"butcher shop, meat market",
+   "468":"cab, hack, taxi, taxicab",
+   "469":"caldron, cauldron",
+   "470":"candle, taper, wax light",
+   "471":"cannon",
+   "472":"canoe",
+   "473":"can opener, tin opener",
+   "474":"cardigan",
+   "475":"car mirror",
+   "476":"carousel, carrousel, merry-go-round, roundabout, whirligig",
+   "477":"carpenter's kit, tool kit",
+   "478":"carton",
+   "479":"car wheel",
+   "480":"cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM",
+   "481":"cassette",
+   "482":"cassette player",
+   "483":"castle",
+   "484":"catamaran",
+   "485":"CD player",
+   "486":"cello, violoncello",
+   "487":"cellular telephone, cellular phone, cellphone, cell, mobile phone",
+   "488":"chain",
+   "489":"chainlink fence",
+   "490":"chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour",
+   "491":"chain saw, chainsaw",
+   "492":"chest",
+   "493":"chiffonier, commode",
+   "494":"chime, bell, gong",
+   "495":"china cabinet, china closet",
+   "496":"Christmas stocking",
+   "497":"church, church building",
+   "498":"cinema, movie theater, movie theatre, movie house, picture palace",
+   "499":"cleaver, meat cleaver, chopper",
+   "500":"cliff dwelling",
+   "501":"cloak",
+   "502":"clog, geta, patten, sabot",
+   "503":"cocktail shaker",
+   "504":"coffee mug",
+   "505":"coffeepot",
+   "506":"coil, spiral, volute, whorl, helix",
+   "507":"combination lock",
+   "508":"computer keyboard, keypad",
+   "509":"confectionery, confectionary, candy store",
+   "510":"container ship, containership, container vessel",
+   "511":"convertible",
+   "512":"corkscrew, bottle screw",
+   "513":"cornet, horn, trumpet, trump",
+   "514":"cowboy boot",
+   "515":"cowboy hat, ten-gallon hat",
+   "516":"cradle",
+   "517":"crane",
+   "518":"crash helmet",
+   "519":"crate",
+   "520":"crib, cot",
+   "521":"Crock Pot",
+   "522":"croquet ball",
+   "523":"crutch",
+   "524":"cuirass",
+   "525":"dam, dike, dyke",
+   "526":"desk",
+   "527":"desktop computer",
+   "528":"dial telephone, dial phone",
+   "529":"diaper, nappy, napkin",
+   "530":"digital clock",
+   "531":"digital watch",
+   "532":"dining table, board",
+   "533":"dishrag, dishcloth",
+   "534":"dishwasher, dish washer, dishwashing machine",
+   "535":"disk brake, disc brake",
+   "536":"dock, dockage, docking facility",
+   "537":"dogsled, dog sled, dog sleigh",
+   "538":"dome",
+   "539":"doormat, welcome mat",
+   "540":"drilling platform, offshore rig",
+   "541":"drum, membranophone, tympan",
+   "542":"drumstick",
+   "543":"dumbbell",
+   "544":"Dutch oven",
+   "545":"electric fan, blower",
+   "546":"electric guitar",
+   "547":"electric locomotive",
+   "548":"entertainment center",
+   "549":"envelope",
+   "550":"espresso maker",
+   "551":"face powder",
+   "552":"feather boa, boa",
+   "553":"file, file cabinet, filing cabinet",
+   "554":"fireboat",
+   "555":"fire engine, fire truck",
+   "556":"fire screen, fireguard",
+   "557":"flagpole, flagstaff",
+   "558":"flute, transverse flute",
+   "559":"folding chair",
+   "560":"football helmet",
+   "561":"forklift",
+   "562":"fountain",
+   "563":"fountain pen",
+   "564":"four-poster",
+   "565":"freight car",
+   "566":"French horn, horn",
+   "567":"frying pan, frypan, skillet",
+   "568":"fur coat",
+   "569":"garbage truck, dustcart",
+   "570":"gasmask, respirator, gas helmet",
+   "571":"gas pump, gasoline pump, petrol pump, island dispenser",
+   "572":"goblet",
+   "573":"go-kart",
+   "574":"golf ball",
+   "575":"golfcart, golf cart",
+   "576":"gondola",
+   "577":"gong, tam-tam",
+   "578":"gown",
+   "579":"grand piano, grand",
+   "580":"greenhouse, nursery, glasshouse",
+   "581":"grille, radiator grille",
+   "582":"grocery store, grocery, food market, market",
+   "583":"guillotine",
+   "584":"hair slide",
+   "585":"hair spray",
+   "586":"half track",
+   "587":"hammer",
+   "588":"hamper",
+   "589":"hand blower, blow dryer, blow drier, hair dryer, hair drier",
+   "590":"hand-held computer, hand-held microcomputer",
+   "591":"handkerchief, hankie, hanky, hankey",
+   "592":"hard disc, hard disk, fixed disk",
+   "593":"harmonica, mouth organ, harp, mouth harp",
+   "594":"harp",
+   "595":"harvester, reaper",
+   "596":"hatchet",
+   "597":"holster",
+   "598":"home theater, home theatre",
+   "599":"honeycomb",
+   "600":"hook, claw",
+   "601":"hoopskirt, crinoline",
+   "602":"horizontal bar, high bar",
+   "603":"horse cart, horse-cart",
+   "604":"hourglass",
+   "605":"iPod",
+   "606":"iron, smoothing iron",
+   "607":"jack-o'-lantern",
+   "608":"jean, blue jean, denim",
+   "609":"jeep, landrover",
+   "610":"jersey, T-shirt, tee shirt",
+   "611":"jigsaw puzzle",
+   "612":"jinrikisha, ricksha, rickshaw",
+   "613":"joystick",
+   "614":"kimono",
+   "615":"knee pad",
+   "616":"knot",
+   "617":"lab coat, laboratory coat",
+   "618":"ladle",
+   "619":"lampshade, lamp shade",
+   "620":"laptop, laptop computer",
+   "621":"lawn mower, mower",
+   "622":"lens cap, lens cover",
+   "623":"letter opener, paper knife, paperknife",
+   "624":"library",
+   "625":"lifeboat",
+   "626":"lighter, light, igniter, ignitor",
+   "627":"limousine, limo",
+   "628":"liner, ocean liner",
+   "629":"lipstick, lip rouge",
+   "630":"Loafer",
+   "631":"lotion",
+   "632":"loudspeaker, speaker, speaker unit, loudspeaker system, speaker system",
+   "633":"loupe, jeweler's loupe",
+   "634":"lumbermill, sawmill",
+   "635":"magnetic compass",
+   "636":"mailbag, postbag",
+   "637":"mailbox, letter box",
+   "638":"maillot",
+   "639":"maillot, tank suit",
+   "640":"manhole cover",
+   "641":"maraca",
+   "642":"marimba, xylophone",
+   "643":"mask",
+   "644":"matchstick",
+   "645":"maypole",
+   "646":"maze, labyrinth",
+   "647":"measuring cup",
+   "648":"medicine chest, medicine cabinet",
+   "649":"megalith, megalithic structure",
+   "650":"microphone, mike",
+   "651":"microwave, microwave oven",
+   "652":"military uniform",
+   "653":"milk can",
+   "654":"minibus",
+   "655":"miniskirt, mini",
+   "656":"minivan",
+   "657":"missile",
+   "658":"mitten",
+   "659":"mixing bowl",
+   "660":"mobile home, manufactured home",
+   "661":"Model T",
+   "662":"modem",
+   "663":"monastery",
+   "664":"monitor",
+   "665":"moped",
+   "666":"mortar",
+   "667":"mortarboard",
+   "668":"mosque",
+   "669":"mosquito net",
+   "670":"motor scooter, scooter",
+   "671":"mountain bike, all-terrain bike, off-roader",
+   "672":"mountain tent",
+   "673":"mouse, computer mouse",
+   "674":"mousetrap",
+   "675":"moving van",
+   "676":"muzzle",
+   "677":"nail",
+   "678":"neck brace",
+   "679":"necklace",
+   "680":"nipple",
+   "681":"notebook, notebook computer",
+   "682":"obelisk",
+   "683":"oboe, hautboy, hautbois",
+   "684":"ocarina, sweet potato",
+   "685":"odometer, hodometer, mileometer, milometer",
+   "686":"oil filter",
+   "687":"organ, pipe organ",
+   "688":"oscilloscope, scope, cathode-ray oscilloscope, CRO",
+   "689":"overskirt",
+   "690":"oxcart",
+   "691":"oxygen mask",
+   "692":"packet",
+   "693":"paddle, boat paddle",
+   "694":"paddlewheel, paddle wheel",
+   "695":"padlock",
+   "696":"paintbrush",
+   "697":"pajama, pyjama, pj's, jammies",
+   "698":"palace",
+   "699":"panpipe, pandean pipe, syrinx",
+   "700":"paper towel",
+   "701":"parachute, chute",
+   "702":"parallel bars, bars",
+   "703":"park bench",
+   "704":"parking meter",
+   "705":"passenger car, coach, carriage",
+   "706":"patio, terrace",
+   "707":"pay-phone, pay-station",
+   "708":"pedestal, plinth, footstall",
+   "709":"pencil box, pencil case",
+   "710":"pencil sharpener",
+   "711":"perfume, essence",
+   "712":"Petri dish",
+   "713":"photocopier",
+   "714":"pick, plectrum, plectron",
+   "715":"pickelhaube",
+   "716":"picket fence, paling",
+   "717":"pickup, pickup truck",
+   "718":"pier",
+   "719":"piggy bank, penny bank",
+   "720":"pill bottle",
+   "721":"pillow",
+   "722":"ping-pong ball",
+   "723":"pinwheel",
+   "724":"pirate, pirate ship",
+   "725":"pitcher, ewer",
+   "726":"plane, carpenter's plane, woodworking plane",
+   "727":"planetarium",
+   "728":"plastic bag",
+   "729":"plate rack",
+   "730":"plow, plough",
+   "731":"plunger, plumber's helper",
+   "732":"Polaroid camera, Polaroid Land camera",
+   "733":"pole",
+   "734":"police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria",
+   "735":"poncho",
+   "736":"pool table, billiard table, snooker table",
+   "737":"pop bottle, soda bottle",
+   "738":"pot, flowerpot",
+   "739":"potter's wheel",
+   "740":"power drill",
+   "741":"prayer rug, prayer mat",
+   "742":"printer",
+   "743":"prison, prison house",
+   "744":"projectile, missile",
+   "745":"projector",
+   "746":"puck, hockey puck",
+   "747":"punching bag, punch bag, punching ball, punchball",
+   "748":"purse",
+   "749":"quill, quill pen",
+   "750":"quilt, comforter, comfort, puff",
+   "751":"racer, race car, racing car",
+   "752":"racket, racquet",
+   "753":"radiator",
+   "754":"radio, wireless",
+   "755":"radio telescope, radio reflector",
+   "756":"rain barrel",
+   "757":"recreational vehicle, RV, R.V.",
+   "758":"reel",
+   "759":"reflex camera",
+   "760":"refrigerator, icebox",
+   "761":"remote control, remote",
+   "762":"restaurant, eating house, eating place, eatery",
+   "763":"revolver, six-gun, six-shooter",
+   "764":"rifle",
+   "765":"rocking chair, rocker",
+   "766":"rotisserie",
+   "767":"rubber eraser, rubber, pencil eraser",
+   "768":"rugby ball",
+   "769":"rule, ruler",
+   "770":"running shoe",
+   "771":"safe",
+   "772":"safety pin",
+   "773":"saltshaker, salt shaker",
+   "774":"sandal",
+   "775":"sarong",
+   "776":"sax, saxophone",
+   "777":"scabbard",
+   "778":"scale, weighing machine",
+   "779":"school bus",
+   "780":"schooner",
+   "781":"scoreboard",
+   "782":"screen, CRT screen",
+   "783":"screw",
+   "784":"screwdriver",
+   "785":"seat belt, seatbelt",
+   "786":"sewing machine",
+   "787":"shield, buckler",
+   "788":"shoe shop, shoe-shop, shoe store",
+   "789":"shoji",
+   "790":"shopping basket",
+   "791":"shopping cart",
+   "792":"shovel",
+   "793":"shower cap",
+   "794":"shower curtain",
+   "795":"ski",
+   "796":"ski mask",
+   "797":"sleeping bag",
+   "798":"slide rule, slipstick",
+   "799":"sliding door",
+   "800":"slot, one-armed bandit",
+   "801":"snorkel",
+   "802":"snowmobile",
+   "803":"snowplow, snowplough",
+   "804":"soap dispenser",
+   "805":"soccer ball",
+   "806":"sock",
+   "807":"solar dish, solar collector, solar furnace",
+   "808":"sombrero",
+   "809":"soup bowl",
+   "810":"space bar",
+   "811":"space heater",
+   "812":"space shuttle",
+   "813":"spatula",
+   "814":"speedboat",
+   "815":"spider web, spider's web",
+   "816":"spindle",
+   "817":"sports car, sport car",
+   "818":"spotlight, spot",
+   "819":"stage",
+   "820":"steam locomotive",
+   "821":"steel arch bridge",
+   "822":"steel drum",
+   "823":"stethoscope",
+   "824":"stole",
+   "825":"stone wall",
+   "826":"stopwatch, stop watch",
+   "827":"stove",
+   "828":"strainer",
+   "829":"streetcar, tram, tramcar, trolley, trolley car",
+   "830":"stretcher",
+   "831":"studio couch, day bed",
+   "832":"stupa, tope",
+   "833":"submarine, pigboat, sub, U-boat",
+   "834":"suit, suit of clothes",
+   "835":"sundial",
+   "836":"sunglass",
+   "837":"sunglasses, dark glasses, shades",
+   "838":"sunscreen, sunblock, sun blocker",
+   "839":"suspension bridge",
+   "840":"swab, swob, mop",
+   "841":"sweatshirt",
+   "842":"swimming trunks, bathing trunks",
+   "843":"swing",
+   "844":"switch, electric switch, electrical switch",
+   "845":"syringe",
+   "846":"table lamp",
+   "847":"tank, army tank, armored combat vehicle, armoured combat vehicle",
+   "848":"tape player",
+   "849":"teapot",
+   "850":"teddy, teddy bear",
+   "851":"television, television system",
+   "852":"tennis ball",
+   "853":"thatch, thatched roof",
+   "854":"theater curtain, theatre curtain",
+   "855":"thimble",
+   "856":"thresher, thrasher, threshing machine",
+   "857":"throne",
+   "858":"tile roof",
+   "859":"toaster",
+   "860":"tobacco shop, tobacconist shop, tobacconist",
+   "861":"toilet seat",
+   "862":"torch",
+   "863":"totem pole",
+   "864":"tow truck, tow car, wrecker",
+   "865":"toyshop",
+   "866":"tractor",
+   "867":"trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi",
+   "868":"tray",
+   "869":"trench coat",
+   "870":"tricycle, trike, velocipede",
+   "871":"trimaran",
+   "872":"tripod",
+   "873":"triumphal arch",
+   "874":"trolleybus, trolley coach, trackless trolley",
+   "875":"trombone",
+   "876":"tub, vat",
+   "877":"turnstile",
+   "878":"typewriter keyboard",
+   "879":"umbrella",
+   "880":"unicycle, monocycle",
+   "881":"upright, upright piano",
+   "882":"vacuum, vacuum cleaner",
+   "883":"vase",
+   "884":"vault",
+   "885":"velvet",
+   "886":"vending machine",
+   "887":"vestment",
+   "888":"viaduct",
+   "889":"violin, fiddle",
+   "890":"volleyball",
+   "891":"waffle iron",
+   "892":"wall clock",
+   "893":"wallet, billfold, notecase, pocketbook",
+   "894":"wardrobe, closet, press",
+   "895":"warplane, military plane",
+   "896":"washbasin, handbasin, washbowl, lavabo, wash-hand basin",
+   "897":"washer, automatic washer, washing machine",
+   "898":"water bottle",
+   "899":"water jug",
+   "900":"water tower",
+   "901":"whiskey jug",
+   "902":"whistle",
+   "903":"wig",
+   "904":"window screen",
+   "905":"window shade",
+   "906":"Windsor tie",
+   "907":"wine bottle",
+   "908":"wing",
+   "909":"wok",
+   "910":"wooden spoon",
+   "911":"wool, woolen, woollen",
+   "912":"worm fence, snake fence, snake-rail fence, Virginia fence",
+   "913":"wreck",
+   "914":"yawl",
+   "915":"yurt",
+   "916":"web site, website, internet site, site",
+   "917":"comic book",
+   "918":"crossword puzzle, crossword",
+   "919":"street sign",
+   "920":"traffic light, traffic signal, stoplight",
+   "921":"book jacket, dust cover, dust jacket, dust wrapper",
+   "922":"menu",
+   "923":"plate",
+   "924":"guacamole",
+   "925":"consomme",
+   "926":"hot pot, hotpot",
+   "927":"trifle",
+   "928":"ice cream, icecream",
+   "929":"ice lolly, lolly, lollipop, popsicle",
+   "930":"French loaf",
+   "931":"bagel, beigel",
+   "932":"pretzel",
+   "933":"cheeseburger",
+   "934":"hotdog, hot dog, red hot",
+   "935":"mashed potato",
+   "936":"head cabbage",
+   "937":"broccoli",
+   "938":"cauliflower",
+   "939":"zucchini, courgette",
+   "940":"spaghetti squash",
+   "941":"acorn squash",
+   "942":"butternut squash",
+   "943":"cucumber, cuke",
+   "944":"artichoke, globe artichoke",
+   "945":"bell pepper",
+   "946":"cardoon",
+   "947":"mushroom",
+   "948":"Granny Smith",
+   "949":"strawberry",
+   "950":"orange",
+   "951":"lemon",
+   "952":"fig",
+   "953":"pineapple, ananas",
+   "954":"banana",
+   "955":"jackfruit, jak, jack",
+   "956":"custard apple",
+   "957":"pomegranate",
+   "958":"hay",
+   "959":"carbonara",
+   "960":"chocolate sauce, chocolate syrup",
+   "961":"dough",
+   "962":"meat loaf, meatloaf",
+   "963":"pizza, pizza pie",
+   "964":"potpie",
+   "965":"burrito",
+   "966":"red wine",
+   "967":"espresso",
+   "968":"cup",
+   "969":"eggnog",
+   "970":"alp",
+   "971":"bubble",
+   "972":"cliff, drop, drop-off",
+   "973":"coral reef",
+   "974":"geyser",
+   "975":"lakeside, lakeshore",
+   "976":"promontory, headland, head, foreland",
+   "977":"sandbar, sand bar",
+   "978":"seashore, coast, seacoast, sea-coast",
+   "979":"valley, vale",
+   "980":"volcano",
+   "981":"ballplayer, baseball player",
+   "982":"groom, bridegroom",
+   "983":"scuba diver",
+   "984":"rapeseed",
+   "985":"daisy",
+   "986":"yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+   "987":"corn",
+   "988":"acorn",
+   "989":"hip, rose hip, rosehip",
+   "990":"buckeye, horse chestnut, conker",
+   "991":"coral fungus",
+   "992":"agaric",
+   "993":"gyromitra",
+   "994":"stinkhorn, carrion fungus",
+   "995":"earthstar",
+   "996":"hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa",
+   "997":"bolete",
+   "998":"ear, spike, capitulum",
+   "999":"toilet tissue, toilet paper, bathroom tissue"
+}
\ No newline at end of file
--- a/GTSRB.Keras3/modules/ImagenetClassnames.py
+++ b/GTSRB.Keras3/modules/ImagenetClassnames.py
+# ------------------------------------------------------------------
+#     _____ _     _ _
+#    |  ___(_) __| | | ___
+#    | |_  | |/ _` | |/ _ \
+#    |  _| | | (_| | |  __/
+#    |_|   |_|\__,_|_|\___|                         Imagenet Classes
+# ------------------------------------------------------------------
+# Formation Introduction au Deep Learning (FIDLE) - CNRS/MIAI/UGA
+# ------------------------------------------------------------------
+# JL Parouty 2024
+
+
+import os
+import json
+
+class ImagenetClassnames:
+
+    classes_file = 'ImagenetClassnames.json'
+
+    def __init__(self):
+        path = os.path.abspath(__file__)
+        dir_path = os.path.dirname(path)
+        with open(f'{dir_path}/{self.classes_file}') as f:
+            self.classes = json.load(f)
+        print(f'Imagenet classes loaded ({len(self.classes)} classes)')
+
+
+    def get(self, classes_id, top_n=2):
+        top_classes = [self.classes[str(i)] for i in classes_id[-top_n:]]
+        return top_classes
\ No newline at end of file
--- a/GTSRB.Keras3/modules/my_TensorboardCallback.py
+++ b/GTSRB.Keras3/modules/my_TensorboardCallback.py
+
+# ------------------------------------------------------------------
+#     _____ _     _ _
+#    |  ___(_) __| | | ___
+#    | |_  | |/ _` | |/ _ \
+#    |  _| | | (_| | |  __/
+#    |_|   |_|\__,_|_|\___|                     Tensorboard callback
+# ------------------------------------------------------------------
+# Formation Introduction au Deep Learning (FIDLE) - CNRS/MIAI/UGA
+# ------------------------------------------------------------------
+# JL Parouty 2023
+#
+# See : https://keras.io/api/callbacks/
+# See : https://keras.io/guides/writing_your_own_callbacks/
+# See : https://pytorch.org/docs/stable/tensorboard.html
+
+import keras
+from torch.utils.tensorboard import SummaryWriter
+
+
+class TensorboardCallback(keras.callbacks.Callback):
+
+    def __init__(self, log_dir=None):
+        '''
+        Init callback
+        Args:
+            log_dir : log directory
+        '''
+        self.writer = SummaryWriter(log_dir=log_dir)
+
+
+    def on_epoch_end(self, epoch, logs=None):
+        '''
+        Record logs at epoch end
+        '''
+
+        # ---- Records all metrics (very simply)
+        #
+        # for k,v in logs.items():
+        #     self.writer.add_scalar(k,v, epoch)
+
+        # ---- Records and group specific metrics
+        #
+        self.writer.add_scalars('Accuracy',
+                                {'Train':logs['accuracy'],
+                                  'Validation':logs['val_accuracy']},
+                                 epoch )
+        
+        self.writer.add_scalars('Loss',
+                                {'Train':logs['loss'],
+                                  'Validation':logs['val_loss']},
+                                 epoch )
+
No results found