From 930f74c05ad7ad6fee971bfd67e14cba90bc2399 Mon Sep 17 00:00:00 2001
From: Jean-Luc Parouty <Jean-Luc.Parouty@simap.grenoble-inp.fr>
Date: Wed, 22 Dec 2021 12:33:00 +0100
Subject: [PATCH] Update VAE for 192x160

---
 VAE/06-Prepare-CelebA-datasets.ipynb          |  12 +-
 VAE/09-VAE-with-CelebA-192x160.ipynb          | 457 ++++++++++++++++++
 ...st.ipynb => 10-VAE-with-CelebA-post.ipynb} |   0
 3 files changed, 468 insertions(+), 1 deletion(-)
 create mode 100644 VAE/09-VAE-with-CelebA-192x160.ipynb
 rename VAE/{09-VAE-with-CelebA-post.ipynb => 10-VAE-with-CelebA-post.ipynb} (100%)

diff --git a/VAE/06-Prepare-CelebA-datasets.ipynb b/VAE/06-Prepare-CelebA-datasets.ipynb
index c3a5185..29a9bbf 100644
--- a/VAE/06-Prepare-CelebA-datasets.ipynb
+++ b/VAE/06-Prepare-CelebA-datasets.ipynb
@@ -111,6 +111,16 @@
     "# output_dir    = f'{datasets_dir}/celeba/enhanced'\n",
     "# exit_if_exist = True\n",
     "\n",
+    "# ---- Just for tests\n",
+    "#      Save clustered dataset in ./data\n",
+    "#\n",
+    "# scale         = 0.05\n",
+    "# seed          = 123\n",
+    "# cluster_size  = 10000\n",
+    "# image_size    = (192,160)\n",
+    "# output_dir    = './data'\n",
+    "# exit_if_exist = False\n",
+    "\n",
     "# ---- Full clusters generation, large size : 138 GB\n",
     "#      Save clustered dataset in <datasets_dir> \n",
     "#\n",
@@ -357,7 +367,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/VAE/09-VAE-with-CelebA-192x160.ipynb b/VAE/09-VAE-with-CelebA-192x160.ipynb
new file mode 100644
index 0000000..2ac969a
--- /dev/null
+++ b/VAE/09-VAE-with-CelebA-192x160.ipynb
@@ -0,0 +1,457 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img width=\"800px\" src=\"../fidle/img/00-Fidle-header-01.svg\"></img>\n",
+    "\n",
+    "# <!-- TITLE --> [VAE9] - Training session for our VAE with 192x160 images\n",
+    "<!-- DESC --> Episode 4 : Training with our clustered datasets in notebook or batch mode\n",
+    "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
+    "\n",
+    "## Objectives :\n",
+    " - Build and train a VAE model with a large dataset in  **medium resolution 140 GB**\n",
+    " - Understanding a more advanced programming model with **data generator**\n",
+    "\n",
+    "The [CelebFaces Attributes Dataset (CelebA)](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) contains about 200,000 images (202599,218,178,3).  \n",
+    "\n",
+    "## What we're going to do :\n",
+    "\n",
+    " - Defining a VAE model\n",
+    " - Build the model\n",
+    " - Train it\n",
+    " - Follow the learning process with Tensorboard\n",
+    "\n",
+    "## Acknowledgements :\n",
+    "As before, thanks to **FranÃ§ois Chollet** who is at the base of this example.  \n",
+    "See : https://keras.io/examples/generative/vae\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1 - Init python stuff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import sys\n",
+    "\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "from tensorflow.keras.callbacks import TensorBoard\n",
+    "\n",
+    "from modules.models    import VAE\n",
+    "from modules.layers    import SamplingLayer\n",
+    "from modules.callbacks import ImagesCallback, BestModelCallback\n",
+    "from modules.datagen   import DataGenerator\n",
+    "\n",
+    "sys.path.append('..')\n",
+    "import fidle.pwk as pwk\n",
+    "\n",
+    "run_dir = './run/VAE9'\n",
+    "datasets_dir = pwk.init('VAE9', run_dir)\n",
+    "\n",
+    "VAE.about()\n",
+    "DataGenerator.about()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# To clean run_dir, uncomment and run this next line\n",
+    "# ! rm -r \"$run_dir\"/images-* \"$run_dir\"/logs \"$run_dir\"/figs \"$run_dir\"/models ; rmdir \"$run_dir\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2 - Parameters\n",
+    "`scale` : With scale=1, we need 1'30s on a GPU V100 ...and >20' on a CPU !  \n",
+    "`latent_dim` : 2 dimensions is small, but usefull to draw !  \n",
+    "`fit_verbosity` : verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch  \n",
+    "`loss_weights` : Our **loss function** is the weighted sum of two loss:\n",
+    " - `r_loss` which measures the loss during reconstruction.  \n",
+    " - `kl_loss` which measures the dispersion.  \n",
+    "\n",
+    "The weights are defined by: `loss_weights=[k1,k2]` where : `total_loss = k1*r_loss + k2*kl_loss`  \n",
+    "In practice, a value of \\[.6,.4\\] gives good results here.\n",
+    "\n",
+    "\n",
+    "Uncomment the right lines according to what you want."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fit_verbosity = 1\n",
+    "\n",
+    "# ---- For tests\n",
+    "\n",
+    "scale         = 0.01\n",
+    "image_size    = (192,160)\n",
+    "enhanced_dir  = './data'\n",
+    "latent_dim    = 300\n",
+    "loss_weights  = [.6,.4]\n",
+    "batch_size    = 64\n",
+    "epochs        = 5\n",
+    "\n",
+    "# ---- Training with a full dataset of large images\n",
+    "#\n",
+    "# scale         = 1.\n",
+    "# image_size    = (192,160)\n",
+    "# enhanced_dir  = f'{datasets_dir}/celeba/enhanced'\n",
+    "# latent_dim    = 300\n",
+    "# loss_weights  = [.6,.4]\n",
+    "# batch_size    = 64\n",
+    "# epochs        = 15"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Override parameters (batch mode) - Just forget this cell"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.override('scale', 'image_size', 'enhanced_dir', 'latent_dim', 'loss_weights')\n",
+    "pwk.override('batch_size', 'epochs', 'fit_verbosity')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3 - Prepare data\n",
+    "Let's instantiate our generator for the entire dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.1 - Finding the right place"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lx,ly      = image_size\n",
+    "train_dir  = f'{enhanced_dir}/clusters-{lx}x{ly}'\n",
+    "\n",
+    "print('Train directory is :',train_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.2 - Get a DataGenerator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_gen = DataGenerator(train_dir, 32, scale=scale)\n",
+    "\n",
+    "print(f'Data generator is ready with : {len(data_gen)} batchs of {data_gen.batch_size} images, or {data_gen.dataset_size} images')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4 - Build model\n",
+    "Note: We conserve the geometry of our last convolutional output (shape_before_flattening) so that we can adapt the decoder to the encoder."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Encoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs    = keras.Input(shape=(lx, ly, 3))\n",
+    "x         = layers.Conv2D(32,  4, strides=2, padding=\"same\", activation=\"relu\")(inputs)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Conv2D(64,  4, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Conv2D(128, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Conv2D(256, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Conv2D(512, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n",
+    "x         = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x         = layers.Flatten()(x)\n",
+    "\n",
+    "z_mean    = layers.Dense(latent_dim, name=\"z_mean\")(x)\n",
+    "z_log_var = layers.Dense(latent_dim, name=\"z_log_var\")(x)\n",
+    "z         = SamplingLayer()([z_mean, z_log_var])\n",
+    "\n",
+    "encoder = keras.Model(inputs, [z_mean, z_log_var, z], name=\"encoder\")\n",
+    "encoder.compile()\n",
+    "# encoder.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Decoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs  = keras.Input(shape=(latent_dim,))\n",
+    "\n",
+    "x       = layers.Dense(512*6*5)(inputs)\n",
+    "x       = layers.Reshape((6,5,512))(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "x       = layers.Conv2D(512,  kernel_size=3, strides=1, padding='same', activation='relu')(x)\n",
+    "x       = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "x       = layers.Conv2D(256,  kernel_size=3, strides=1, padding='same', activation='relu')(x)\n",
+    "x       = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "x       = layers.Conv2D(128,  kernel_size=3, strides=1, padding='same', activation='relu')(x)\n",
+    "x       = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "x       = layers.Conv2D(64,   kernel_size=3, strides=1, padding='same', activation='relu')(x)\n",
+    "x       = layers.BatchNormalization(axis=1)(x)\n",
+    "\n",
+    "x       = layers.UpSampling2D()(x)\n",
+    "outputs = layers.Conv2D(3,    kernel_size=3, strides=1, padding='same', activation='sigmoid')(x)\n",
+    "\n",
+    "decoder = keras.Model(inputs, outputs, name=\"decoder\")\n",
+    "decoder.compile()\n",
+    "# decoder.summary()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### VAE\n",
+    "Our loss function is the weighted sum of two values.  \n",
+    "`reconstruction_loss` which measures the loss during reconstruction.  \n",
+    "`kl_loss` which measures the dispersion.  \n",
+    "\n",
+    "The weights are defined by: `r_loss_factor` :  \n",
+    "`total_loss = r_loss_factor*reconstruction_loss + (1-r_loss_factor)*kl_loss`\n",
+    "\n",
+    "if `r_loss_factor = 1`, the loss function includes only `reconstruction_loss`  \n",
+    "if `r_loss_factor = 0`, the loss function includes only `kl_loss`  \n",
+    "In practice, a value arround 0.5 gives good results here.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vae = VAE(encoder, decoder, loss_weights)\n",
+    "\n",
+    "vae.compile(optimizer=keras.optimizers.Adam())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 5 - Train\n",
+    "With `scale=1`, need 20' for 10 epochs on a V100 (IDRIS)  \n",
+    "...on a basic CPU, may be >40 hours !"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.1 - Callbacks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_draw,_   = data_gen[0]\n",
+    "data_gen.rewind()\n",
+    "\n",
+    "callback_images      = ImagesCallback(x=x_draw, z_dim=latent_dim, nb_images=5, from_z=True, from_random=True, run_dir=run_dir)\n",
+    "callback_bestmodel   = BestModelCallback( run_dir + '/models/best_model.h5' )\n",
+    "callback_tensorboard = TensorBoard(log_dir=run_dir + '/logs', histogram_freq=1)\n",
+    "\n",
+    "callbacks_list = [callback_images, callback_bestmodel]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.2 - Train it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.chrono_start()\n",
+    "\n",
+    "history = vae.fit(data_gen, epochs=epochs, batch_size=batch_size, callbacks=callbacks_list, verbose=fit_verbosity)\n",
+    "\n",
+    "pwk.chrono_show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 6 - Training review\n",
+    "### 6.1 - History"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.plot_history(history,  plot={\"Loss\":['loss','r_loss', 'kl_loss']}, save_as='01-history')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6.2 - Reconstruction during training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images_z, images_r = callback_images.get_images( range(0,epochs,2) )\n",
+    "\n",
+    "pwk.subtitle('Original images :')\n",
+    "pwk.plot_images(x_draw[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as='02-original')\n",
+    "\n",
+    "pwk.subtitle('Encoded/decoded images')\n",
+    "pwk.plot_images(images_z, None, indices='all', columns=5, x_size=2,y_size=2, save_as='03-reconstruct')\n",
+    "\n",
+    "pwk.subtitle('Original images :')\n",
+    "pwk.plot_images(x_draw[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6.3 - Generation (latent -> decoder) during training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.subtitle('Generated images from latent space')\n",
+    "pwk.plot_images(images_r, None, indices='all', columns=5, x_size=2,y_size=2, save_as='04-encoded')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pwk.end()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "<img width=\"80px\" src=\"../fidle/img/00-Fidle-logo-01.svg\"></img>"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/VAE/09-VAE-with-CelebA-post.ipynb b/VAE/10-VAE-with-CelebA-post.ipynb
similarity index 100%
rename from VAE/09-VAE-with-CelebA-post.ipynb
rename to VAE/10-VAE-with-CelebA-post.ipynb
-- 
GitLab