Update VAE for 192x160

930f74c0 · Jean-Luc Parouty · e1e1871c · 930f74c0 · 930f74c0 · 930f74c0
Commit 930f74c0 authored 3 years ago by Jean-Luc Parouty
--- a/VAE/06-Prepare-CelebA-datasets.ipynb
+++ b/VAE/06-Prepare-CelebA-datasets.ipynb
@@ -111,6 +111,16 @@
    "# output_dir    = f'{datasets_dir}/celeba/enhanced'\n",
    "# exit_if_exist = True\n",
    "\n",
+    "# ---- Just for tests\n",
+    "#      Save clustered dataset in ./data\n",
+    "#\n",
+    "# scale         = 0.05\n",
+    "# seed          = 123\n",
+    "# cluster_size  = 10000\n",
+    "# image_size    = (192,160)\n",
+    "# output_dir    = './data'\n",
+    "# exit_if_exist = False\n",
+    "\n",
    "# ---- Full clusters generation, large size : 138 GB\n",
    "#      Save clustered dataset in <datasets_dir> \n",
    "#\n",
@@ -357,7 +367,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.9.7"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id: tags:
 <img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
 # <!-- TITLE --> [VAE6] - Generation of a clustered dataset
 <!-- DESC --> Episode 2 : Analysis of the CelebA dataset and creation of an clustered and usable dataset
 <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
 ## Objectives :
 - Formatting our dataset in **cluster files**, using batch mode
 - Adapting a notebook for batch use
 The [CelebFaces Attributes Dataset (CelebA)](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) contains about **200,000 images** (202599,218,178,3).
 The size and the number of files of this dataset make it impossible to use it as it is.
 A formatting in the form of clusters of n images is essential.
 ## What we're going to do :
 - Lire les images
 - redimensionner et normaliser celles-ci,
 - Constituer des clusters d'images en format npy
 %% Cell type:markdown id: tags:
 ## Step 1 - Import and init
 %% Cell type:code id: tags:
 ``` python
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 from skimage import io, transform
 import os,pathlib,time,sys,json,glob
 import csv
 import math, random
 from importlib import reload
 sys.path.append('..')
 import fidle.pwk as pwk
 run_dir='./run/VAE6'
 datasets_dir = pwk.init('VAE6', run_dir)
 ```
 %% Cell type:markdown id: tags:
 ## Step 2 - Parameters
 All the dataset will be use for training
 Reading the 200,000 images can take a long time **(>20 minutes)** and a lot of place **(>170 GB)**
 Example :
 Image Sizes: 128x128 : 74 GB
 Image Sizes: 192x160 : 138 GB
 You can define theses parameters :
 `scale` : 1 mean 100% of the dataset - set 0.05 for tests
 `image_size` : images size in the clusters, should be 128x128 or 192,160 - original size is (218,178)
 `output_dir` : where to write clusters, could be :
 - `./data`, for tests purpose
 - `<datasets_dir>/celeba/enhanced` to add clusters in your datasets dir.
 `cluster_size` : number of images in a cluster, 10000 is fine. (will be adjust by scale)
 `progress_verbosity`: Verbosity of progress bar: 0=no progress, 1: progress bar, 2: One line
 **Note :** If the target folder is not empty and exit_if_exist is True, the construction is blocked.
 %% Cell type:code id: tags:
 ``` python
 # ---- Parameters you can change -----------------------------------
 #
 progress_verbosity = 1
 # ---- Just for tests
 #      Save clustered dataset in ./data
 #
 scale         = 0.05
 seed          = 123
 cluster_size  = 10000
 image_size    = (128,128)
 output_dir    = './data'
 exit_if_exist = False
 # ---- Full clusters generation, medium size : 74 GB
 #      Save clustered dataset in <datasets_dir>
 #
 # scale         = 1.
 # seed          = 123
 # cluster_size  = 10000
 # image_size    = (128,128)
 # output_dir    = f'{datasets_dir}/celeba/enhanced'
 # exit_if_exist = True
+# ---- Just for tests
+#      Save clustered dataset in ./data
+#
+# scale         = 0.05
+# seed          = 123
+# cluster_size  = 10000
+# image_size    = (192,160)
+# output_dir    = './data'
+# exit_if_exist = False
 # ---- Full clusters generation, large size : 138 GB
 #      Save clustered dataset in <datasets_dir>
 #
 # scale         = 1.
 # seed          = 123
 # cluster_size  = 10000
 # image_size    = (192,160)
 # output_dir    = f'{datasets_dir}/celeba/enhanced'
 # exit_if_exist = True
 ```
 %% Cell type:code id: tags:
 ``` python
 # ---- Used for continous integration - Just forget these lines
 #
 pwk.override('progress_verbosity', 'scale', 'seed', )
 pwk.override('cluster_size', 'image_size', 'output_dir', 'exit_if_exist')
 ```
 %% Cell type:markdown id: tags:
 ## Step 3 - Cluster construction
 %% Cell type:markdown id: tags:
 ### 3.1 - Directories and files :
 %% Cell type:code id: tags:
 ``` python
 dataset_csv = f'{datasets_dir}/celeba/origine/list_attr_celeba.csv'
 dataset_img = f'{datasets_dir}/celeba/origine/img_align_celeba'
 ```
 %% Cell type:markdown id: tags:
 ### 3.2 - Cooking function
 %% Cell type:code id: tags:
 ``` python
 def read_and_save( dataset_csv, dataset_img, shuffle=True, seed=None, scale=1,
                   cluster_size=1000, cluster_dir='./dataset_cluster', cluster_name='images',
                   image_size=(128,128), exit_if_exist=True, verbosity=1):
    '''
    Will read the images and save a clustered dataset
    Args:
        dataset_csv : list and description of original images
        dataset_img : original images directory
        shuffle     : shuffle data if True  (True)
        seed        : random seed value. False mean no seed, None mean using /dev/urandom (None)
        scale       : scale of dataset to use. 1. mean 100% (1.)
        cluster_size : Size of generated cluster (10000)
        cluster_dir  : Directory of generated clusters (''./dataset_cluster')
        cluster_name : Name of generated clusters ('images')
        image_size   : Size of generated images (128,128)
        exit_if_exist : Exit if clusters still exists.
    Returns:
        nb_clusters : Number of clusters
        duration: total duration
    '''
    global pwk
    def save_cluster(imgs,desc,cols,id):
        file_img  = f'{cluster_dir}/{cluster_name}-{id:03d}.npy'
        file_desc = f'{cluster_dir}/{cluster_name}-{id:03d}.csv'
        np.save(file_img,  np.array(imgs))
        df=pd.DataFrame(data=desc,columns=cols)
        df.to_csv(file_desc, index=False)
        return [],[],id+1
    pwk.chrono_start()
    # ---- Seed
    #
    if seed is not False:
        np.random.seed(seed)
        print(f'Seeded ({seed})')
    # ---- Read dataset description
    #
    dataset_desc = pd.read_csv(dataset_csv, header=0)
    n=len(dataset_desc)
    print(f'Description loaded ({n} images).')
    # ---- Shuffle
    #
    if shuffle:
        dataset_desc = dataset_desc.reindex(np.random.permutation(dataset_desc.index))
        print('Shuffled.')
    cols = list(dataset_desc.columns)
    # ---- Check if cluster files exist
    #
    if exit_if_exist and os.path.isfile(f'{cluster_dir}/images-000.npy'):
        print('\n*** Oups. There are already clusters in the target folder!\n')
        return 0,0
    pwk.mkdir(cluster_dir)
    # ---- Rescale
    #
    n=int(len(dataset_desc)*scale)
    dataset = dataset_desc[:n]
    cluster_size = int(cluster_size*scale)
    print('Rescaled.')
    pwk.subtitle('Parameters :')
    print(f'Scale is : {scale}')
    print(f'Image size is     : {image_size}')
    print(f'dataset length is : {n}')
    print(f'cluster size is   : {cluster_size}')
    print(f'clusters nb  is   :',int(n/cluster_size + 1))
    print(f'cluster dir  is   : {cluster_dir}')
    # ---- Read and save clusters
    #
    pwk.subtitle('Running...')
    imgs, desc, cluster_id = [],[],0
    #
    for i,row in dataset.iterrows():
        #
        filename = f'{dataset_img}/{row.image_id}'
        #
        # ---- Read image, resize (and normalize)
        #
        img = io.imread(filename)
        img = transform.resize(img, image_size)
        #
        # ---- Add image and description
        #
        imgs.append( img )
        desc.append( row.values )
        #
        # ---- Progress bar
        #
        pwk.update_progress(f'Cluster {cluster_id:03d} :',len(imgs),
                            cluster_size, verbosity=verbosity)
        #
        # ---- Save cluster if full
        #
        if len(imgs)==cluster_size:
            imgs,desc,cluster_id=save_cluster(imgs,desc,cols, cluster_id)
    # ---- Save uncomplete cluster
    if len(imgs)>0 : imgs,desc,cluster_id=save_cluster(imgs,desc,cols,cluster_id)
    duration=pwk.chrono_stop()
    return cluster_id,duration
 ```
 %% Cell type:markdown id: tags:
 ### 3.3 - Clusters building
 %% Cell type:code id: tags:
 ``` python
 # ---- Build clusters
 #
 lx,ly        = image_size
 cluster_dir  = f'{output_dir}/clusters-{lx}x{ly}'
 cluster_nb,duration = read_and_save( dataset_csv, dataset_img,
                                     shuffle       = True,
                                     seed          = seed,
                                     scale         = scale,
                                     cluster_size  = cluster_size,
                                     cluster_dir   = cluster_dir,
                                     image_size    = image_size,
                                     exit_if_exist = exit_if_exist,
                                     verbosity     = progress_verbosity )
 # ---- Conclusion...
 directory = pathlib.Path(cluster_dir)
 s=sum(f.stat().st_size for f in directory.glob('**/*') if f.is_file())
 pwk.subtitle('Ressources :')
 print('Duration     : ',pwk.hdelay(duration))
 print('Size         : ',pwk.hsize(s))
 pwk.subtitle('Estimation with scale=1 :')
 print('Duration     : ',pwk.hdelay(duration*(1/scale)))
 print('Size         : ',pwk.hsize(s*(1/scale)))
 ```
 %% Cell type:code id: tags:
 ``` python
 pwk.end()
 ```
 %% Cell type:markdown id: tags:
 ---
 <img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>

--- a/VAE/09-VAE-with-CelebA-192x160.ipynb
+++ b/VAE/09-VAE-with-CelebA-192x160.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img width=\"800px\" src=\"../fidle/img/00-Fidle-header-01.svg\"></img>\n",
+    "\n",
+    "# <!-- TITLE --> [VAE9] - Training session for our VAE with 192x160 images\n",
+    "<!-- DESC --> Episode 4 : Training with our clustered datasets in notebook or batch mode\n",
+    "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
+    "\n",
+    "## Objectives :\n",
+    " - Build and train a VAE model with a large dataset in  **medium resolution 140 GB**\n",
+    " - Understanding a more advanced programming model with **data generator**\n",
+    "\n",
+    "The [CelebFaces Attributes Dataset (CelebA)](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) contains about 200,000 images (202599,218,178,3).  \n",
+    "\n",
+    "## What we're going to do :\n",
+    "\n",
+    " - Defining a VAE model\n",
+    " - Build the model\n",
+    " - Train it\n",
+    " - Follow the learning process with Tensorboard\n",
+    "\n",
+    "## Acknowledgements :\n",
+    "As before, thanks to **François Chollet** who is at the base of this example.  \n",
+    "See : https://keras.io/examples/generative/vae\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1 - Init python stuff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import sys\n",
+    "\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "from tensorflow.keras.callbacks import TensorBoard\n",
+    "\n",
+    "from modules.models    import VAE\n",
+    "from modules.layers    import SamplingLayer\n",
+    "from modules.callbacks import ImagesCallback, BestModelCallback\n",
+    "from modules.datagen   import DataGenerator\n",
+    "\n",
+    "sys.path.append('..')\n",
+    "import fidle.pwk as pwk\n",
+    "\n",
+    "run_dir = './run/VAE9'\n",
+    "datasets_dir = pwk.init('VAE9', run_dir)\n",
+    "\n",
+    "VAE.about()\n",
+    "DataGenerator.about()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# To clean run_dir, uncomment and run this next line\n",
+    "# ! rm -r \"$run_dir\"/images-* \"$run_dir\"/logs \"$run_dir\"/figs \"$run_dir\"/models ; rmdir \"$run_dir\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2 - Parameters\n",
+    "`scale` : With scale=1, we need 1'30s on a GPU V100 ...and >20' on a CPU !  \n",
+    "`latent_dim` : 2 dimensions is small, but usefull to draw !  \n",
+    "`fit_verbosity` : verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch  \n",
+    "`loss_weights` : Our **loss function** is the weighted sum of two loss:\n",
+    " - `r_loss` which measures the loss during reconstruction.  \n",
+    " - `kl_loss` which measures the dispersion.  \n",
+    "\n",
+    "The weights are defined by: `loss_weights=[k1,k2]` where : `total_loss = k1*r_loss + k2*kl_loss`  \n",
+    "In practice, a value of \\[.6,.4\\] gives good results here.\n",
+    "\n",
+    "\n",
+    "Uncomment the right lines according to what you want."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit_verbosity = 1\n",
+    "\n",
+    "# ---- For tests\n",
+    "\n",
+    "scale         = 0.01\n",
+    "image_size    = (192,160)\n",
+    "enhanced_dir  = './data'\n",
+    "latent_dim    = 300\n",
+    "loss_weights  = [.6,.4]\n",
+    "batch_size    = 64\n",
+    "epochs        = 5\n",
+    "\n",
+    "# ---- Training with a full dataset of large images\n",
+    "#\n",
+    "# scale         = 1.\n",
+    "# image_size    = (192,160)\n",
+    "# enhanced_dir  = f'{datasets_dir}/celeba/enhanced'\n",
+    "# latent_dim    = 300\n",
+    "# loss_weights  = [.6,.4]\n",
+    "# batch_size    = 64\n",
+    "# epochs        = 15"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Override parameters (batch mode) - Just forget this cell"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.override('scale', 'image_size', 'enhanced_dir', 'latent_dim', 'loss_weights')\n",
+    "pwk.override('batch_size', 'epochs', 'fit_verbosity')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3 - Prepare data\n",
+    "Let's instantiate our generator for the entire dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.1 - Finding the right place"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lx,ly      = image_size\n",
+    "train_dir  = f'{enhanced_dir}/clusters-{lx}x{ly}'\n",
+    "\n",
+    "print('Train directory is :',train_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.2 - Get a DataGenerator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_gen = DataGenerator(train_dir, 32, scale=scale)\n",
+    "\n",
+    "print(f'Data generator is ready with : {len(data_gen)} batchs of {data_gen.batch_size} images, or {data_gen.dataset_size} images')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4 - Build model\n",
+    "Note: We conserve the geometry of our last convolutional output (shape_before_flattening) so that we can adapt the decoder to the encoder."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Encoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs    = keras.Input(shape=(lx, ly, 3))\n",
+    "x         = layers.Conv2D(32,  4, strides=2, padding=\"same\", activation=\"relu\")(inputs)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Conv2D(64,  4, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Conv2D(128, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Conv2D(256, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Conv2D(512, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Flatten()(x)\n",
+    "\n",
+    "z_mean    = layers.Dense(latent_dim, name=\"z_mean\")(x)\n",
+    "z_log_var = layers.Dense(latent_dim, name=\"z_log_var\")(x)\n",
+    "z         = SamplingLayer()([z_mean, z_log_var])\n",
+    "\n",
+    "encoder = keras.Model(inputs, [z_mean, z_log_var, z], name=\"encoder\")\n",
+    "encoder.compile()\n",
+    "# encoder.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Decoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs  = keras.Input(shape=(latent_dim,))\n",
+    "\n",
+    "x       = layers.Dense(512*6*5)(inputs)\n",
+    "x       = layers.Reshape((6,5,512))(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "x       = layers.Conv2D(512,  kernel_size=3, strides=1, padding='same', activation='relu')(x)\n",
+    "x       = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "x       = layers.Conv2D(256,  kernel_size=3, strides=1, padding='same', activation='relu')(x)\n",
+    "x       = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "x       = layers.Conv2D(128,  kernel_size=3, strides=1, padding='same', activation='relu')(x)\n",
+    "x       = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "x       = layers.Conv2D(64,   kernel_size=3, strides=1, padding='same', activation='relu')(x)\n",
+    "x       = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "outputs = layers.Conv2D(3,    kernel_size=3, strides=1, padding='same', activation='sigmoid')(x)\n",
+    "\n",
+    "decoder = keras.Model(inputs, outputs, name=\"decoder\")\n",
+    "decoder.compile()\n",
+    "# decoder.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### VAE\n",
+    "Our loss function is the weighted sum of two values.  \n",
+    "`reconstruction_loss` which measures the loss during reconstruction.  \n",
+    "`kl_loss` which measures the dispersion.  \n",
+    "\n",
+    "The weights are defined by: `r_loss_factor` :  \n",
+    "`total_loss = r_loss_factor*reconstruction_loss + (1-r_loss_factor)*kl_loss`\n",
+    "\n",
+    "if `r_loss_factor = 1`, the loss function includes only `reconstruction_loss`  \n",
+    "if `r_loss_factor = 0`, the loss function includes only `kl_loss`  \n",
+    "In practice, a value arround 0.5 gives good results here.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vae = VAE(encoder, decoder, loss_weights)\n",
+    "\n",
+    "vae.compile(optimizer=keras.optimizers.Adam())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 5 - Train\n",
+    "With `scale=1`, need 20' for 10 epochs on a V100 (IDRIS)  \n",
+    "...on a basic CPU, may be >40 hours !"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.1 - Callbacks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_draw,_   = data_gen[0]\n",
+    "data_gen.rewind()\n",
+    "\n",
+    "callback_images      = ImagesCallback(x=x_draw, z_dim=latent_dim, nb_images=5, from_z=True, from_random=True, run_dir=run_dir)\n",
+    "callback_bestmodel   = BestModelCallback( run_dir + '/models/best_model.h5' )\n",
+    "callback_tensorboard = TensorBoard(log_dir=run_dir + '/logs', histogram_freq=1)\n",
+    "\n",
+    "callbacks_list = [callback_images, callback_bestmodel]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.2 - Train it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.chrono_start()\n",
+    "\n",
+    "history = vae.fit(data_gen, epochs=epochs, batch_size=batch_size, callbacks=callbacks_list, verbose=fit_verbosity)\n",
+    "\n",
+    "pwk.chrono_show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 6 - Training review\n",
+    "### 6.1 - History"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.plot_history(history,  plot={\"Loss\":['loss','r_loss', 'kl_loss']}, save_as='01-history')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6.2 - Reconstruction during training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images_z, images_r = callback_images.get_images( range(0,epochs,2) )\n",
+    "\n",
+    "pwk.subtitle('Original images :')\n",
+    "pwk.plot_images(x_draw[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as='02-original')\n",
+    "\n",
+    "pwk.subtitle('Encoded/decoded images')\n",
+    "pwk.plot_images(images_z, None, indices='all', columns=5, x_size=2,y_size=2, save_as='03-reconstruct')\n",
+    "\n",
+    "pwk.subtitle('Original images :')\n",
+    "pwk.plot_images(x_draw[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6.3 - Generation (latent -> decoder) during training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.subtitle('Generated images from latent space')\n",
+    "pwk.plot_images(images_r, None, indices='all', columns=5, x_size=2,y_size=2, save_as='04-encoded')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.end()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "<img width=\"80px\" src=\"../fidle/img/00-Fidle-logo-01.svg\"></img>"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
+%% Cell type:markdown id: tags:
+<img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
+# <!-- TITLE --> [VAE9] - Training session for our VAE with 192x160 images
+<!-- DESC --> Episode 4 : Training with our clustered datasets in notebook or batch mode
+<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
+## Objectives :
+ - Build and train a VAE model with a large dataset in  **medium resolution 140 GB**
+ - Understanding a more advanced programming model with **data generator**
+The [CelebFaces Attributes Dataset (CelebA)](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) contains about 200,000 images (202599,218,178,3).
+## What we're going to do :
+ - Defining a VAE model
+ - Build the model
+ - Train it
+ - Follow the learning process with Tensorboard
+## Acknowledgements :
+As before, thanks to **François Chollet** who is at the base of this example.
+See : https://keras.io/examples/generative/vae
+%% Cell type:markdown id: tags:
+## Step 1 - Init python stuff
+%% Cell type:code id: tags:
+``` 
+import numpy as np
+import matplotlib.pyplot as plt
+import sys
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras.callbacks import TensorBoard
+from modules.models    import VAE
+from modules.layers    import SamplingLayer
+from modules.callbacks import ImagesCallback, BestModelCallback
+from modules.datagen   import DataGenerator
+sys.path.append('..')
+import fidle.pwk as pwk
+run_dir = './run/VAE9'
+datasets_dir = pwk.init('VAE9', run_dir)
+VAE.about()
+DataGenerator.about()
+```
+%% Cell type:code id: tags:
+``` 
+# To clean run_dir, uncomment and run this next line
+# ! rm -r "$run_dir"/images-* "$run_dir"/logs "$run_dir"/figs "$run_dir"/models ; rmdir "$run_dir"
+```
+%% Cell type:markdown id: tags:
+## Step 2 - Parameters
+`scale` : With scale=1, we need 1'30s on a GPU V100 ...and >20' on a CPU !
+`latent_dim` : 2 dimensions is small, but usefull to draw !
+`fit_verbosity` : verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch
+`loss_weights` : Our **loss function** is the weighted sum of two loss:
+ - `r_loss` which measures the loss during reconstruction.
+ - `kl_loss` which measures the dispersion.
+The weights are defined by: `loss_weights=[k1,k2]` where : `total_loss = k1*r_loss + k2*kl_loss`
+In practice, a value of \[.6,.4\] gives good results here.
+Uncomment the right lines according to what you want.
+%% Cell type:code id: tags:
+``` 
+fit_verbosity = 1
+# ---- For tests
+scale         = 0.01
+image_size    = (192,160)
+enhanced_dir  = './data'
+latent_dim    = 300
+loss_weights  = [.6,.4]
+batch_size    = 64
+epochs        = 5
+# ---- Training with a full dataset of large images
+#
+# scale         = 1.
+# image_size    = (192,160)
+# enhanced_dir  = f'{datasets_dir}/celeba/enhanced'
+# latent_dim    = 300
+# loss_weights  = [.6,.4]
+# batch_size    = 64
+# epochs        = 15
+```
+%% Cell type:markdown id: tags:
+Override parameters (batch mode) - Just forget this cell
+%% Cell type:code id: tags:
+``` 
+pwk.override('scale', 'image_size', 'enhanced_dir', 'latent_dim', 'loss_weights')
+pwk.override('batch_size', 'epochs', 'fit_verbosity')
+```
+%% Cell type:markdown id: tags:
+## Step 3 - Prepare data
+Let's instantiate our generator for the entire dataset.
+%% Cell type:markdown id: tags:
+### 3.1 - Finding the right place
+%% Cell type:code id: tags:
+``` 
+lx,ly      = image_size
+train_dir  = f'{enhanced_dir}/clusters-{lx}x{ly}'
+print('Train directory is :',train_dir)
+```
+%% Cell type:markdown id: tags:
+### 3.2 - Get a DataGenerator
+%% Cell type:code id: tags:
+``` 
+data_gen = DataGenerator(train_dir, 32, scale=scale)
+print(f'Data generator is ready with : {len(data_gen)} batchs of {data_gen.batch_size} images, or {data_gen.dataset_size} images')
+```
+%% Cell type:markdown id: tags:
+## Step 4 - Build model
+Note: We conserve the geometry of our last convolutional output (shape_before_flattening) so that we can adapt the decoder to the encoder.
+%% Cell type:markdown id: tags:
+#### Encoder
+%% Cell type:code id: tags:
+``` 
+inputs    = keras.Input(shape=(lx, ly, 3))
+x         = layers.Conv2D(32,  4, strides=2, padding="same", activation="relu")(inputs)
+x         = layers.BatchNormalization(axis=1)(x)
+x         = layers.Conv2D(64,  4, strides=2, padding="same", activation="relu")(x)
+x         = layers.BatchNormalization(axis=1)(x)
+x         = layers.Conv2D(128, 4, strides=2, padding="same", activation="relu")(x)
+x         = layers.BatchNormalization(axis=1)(x)
+x         = layers.Conv2D(256, 4, strides=2, padding="same", activation="relu")(x)
+x         = layers.BatchNormalization(axis=1)(x)
+x         = layers.Conv2D(512, 4, strides=2, padding="same", activation="relu")(x)
+x         = layers.BatchNormalization(axis=1)(x)
+x         = layers.Flatten()(x)
+z_mean    = layers.Dense(latent_dim, name="z_mean")(x)
+z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
+z         = SamplingLayer()([z_mean, z_log_var])
+encoder = keras.Model(inputs, [z_mean, z_log_var, z], name="encoder")
+encoder.compile()
+# encoder.summary()
+```
+%% Cell type:markdown id: tags:
+#### Decoder
+%% Cell type:code id: tags:
+``` 
+inputs  = keras.Input(shape=(latent_dim,))
+x       = layers.Dense(512*6*5)(inputs)
+x       = layers.Reshape((6,5,512))(x)
+x       = layers.UpSampling2D()(x)
+x       = layers.Conv2D(512,  kernel_size=3, strides=1, padding='same', activation='relu')(x)
+x       = layers.BatchNormalization(axis=1)(x)
+x       = layers.UpSampling2D()(x)
+x       = layers.Conv2D(256,  kernel_size=3, strides=1, padding='same', activation='relu')(x)
+x       = layers.BatchNormalization(axis=1)(x)
+x       = layers.UpSampling2D()(x)
+x       = layers.Conv2D(128,  kernel_size=3, strides=1, padding='same', activation='relu')(x)
+x       = layers.BatchNormalization(axis=1)(x)
+x       = layers.UpSampling2D()(x)
+x       = layers.Conv2D(64,   kernel_size=3, strides=1, padding='same', activation='relu')(x)
+x       = layers.BatchNormalization(axis=1)(x)
+x       = layers.UpSampling2D()(x)
+outputs = layers.Conv2D(3,    kernel_size=3, strides=1, padding='same', activation='sigmoid')(x)
+decoder = keras.Model(inputs, outputs, name="decoder")
+decoder.compile()
+# decoder.summary()
+```
+%% Cell type:markdown id: tags:
+#### VAE
+Our loss function is the weighted sum of two values.
+`reconstruction_loss` which measures the loss during reconstruction.
+`kl_loss` which measures the dispersion.
+The weights are defined by: `r_loss_factor` :
+`total_loss = r_loss_factor*reconstruction_loss + (1-r_loss_factor)*kl_loss`
+if `r_loss_factor = 1`, the loss function includes only `reconstruction_loss`
+if `r_loss_factor = 0`, the loss function includes only `kl_loss`
+In practice, a value arround 0.5 gives good results here.
+%% Cell type:code id: tags:
+``` 
+vae = VAE(encoder, decoder, loss_weights)
+vae.compile(optimizer=keras.optimizers.Adam())
+```
+%% Cell type:markdown id: tags:
+## Step 5 - Train
+With `scale=1`, need 20' for 10 epochs on a V100 (IDRIS)
+...on a basic CPU, may be >40 hours !
+%% Cell type:markdown id: tags:
+### 5.1 - Callbacks
+%% Cell type:code id: tags:
+``` 
+x_draw,_   = data_gen[0]
+data_gen.rewind()
+callback_images      = ImagesCallback(x=x_draw, z_dim=latent_dim, nb_images=5, from_z=True, from_random=True, run_dir=run_dir)
+callback_bestmodel   = BestModelCallback( run_dir + '/models/best_model.h5' )
+callback_tensorboard = TensorBoard(log_dir=run_dir + '/logs', histogram_freq=1)
+callbacks_list = [callback_images, callback_bestmodel]
+```
+%% Cell type:markdown id: tags:
+### 5.2 - Train it
+%% Cell type:code id: tags:
+``` 
+pwk.chrono_start()
+history = vae.fit(data_gen, epochs=epochs, batch_size=batch_size, callbacks=callbacks_list, verbose=fit_verbosity)
+pwk.chrono_show()
+```
+%% Cell type:markdown id: tags:
+## Step 6 - Training review
+### 6.1 - History
+%% Cell type:code id: tags:
+``` 
+pwk.plot_history(history,  plot={"Loss":['loss','r_loss', 'kl_loss']}, save_as='01-history')
+```
+%% Cell type:markdown id: tags:
+### 6.2 - Reconstruction during training
+%% Cell type:code id: tags:
+``` 
+images_z, images_r = callback_images.get_images( range(0,epochs,2) )
+pwk.subtitle('Original images :')
+pwk.plot_images(x_draw[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as='02-original')
+pwk.subtitle('Encoded/decoded images')
+pwk.plot_images(images_z, None, indices='all', columns=5, x_size=2,y_size=2, save_as='03-reconstruct')
+pwk.subtitle('Original images :')
+pwk.plot_images(x_draw[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)
+```
+%% Cell type:markdown id: tags:
+### 6.3 - Generation (latent -> decoder) during training
+%% Cell type:code id: tags:
+``` 
+pwk.subtitle('Generated images from latent space')
+pwk.plot_images(images_r, None, indices='all', columns=5, x_size=2,y_size=2, save_as='04-encoded')
+```
+%% Cell type:code id: tags:
+``` 
+pwk.end()
+```
+%% Cell type:markdown id: tags:
+---
+<img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>
--- a/VAE/09-VAE-with-CelebA-post.ipynb
+++ b/VAE/09-VAE-with-CelebA-post.ipynb