From 930f74c05ad7ad6fee971bfd67e14cba90bc2399 Mon Sep 17 00:00:00 2001 From: Jean-Luc Parouty <Jean-Luc.Parouty@simap.grenoble-inp.fr> Date: Wed, 22 Dec 2021 12:33:00 +0100 Subject: [PATCH] Update VAE for 192x160 --- VAE/06-Prepare-CelebA-datasets.ipynb | 12 +- VAE/09-VAE-with-CelebA-192x160.ipynb | 457 ++++++++++++++++++ ...st.ipynb => 10-VAE-with-CelebA-post.ipynb} | 0 3 files changed, 468 insertions(+), 1 deletion(-) create mode 100644 VAE/09-VAE-with-CelebA-192x160.ipynb rename VAE/{09-VAE-with-CelebA-post.ipynb => 10-VAE-with-CelebA-post.ipynb} (100%) diff --git a/VAE/06-Prepare-CelebA-datasets.ipynb b/VAE/06-Prepare-CelebA-datasets.ipynb index c3a5185..29a9bbf 100644 --- a/VAE/06-Prepare-CelebA-datasets.ipynb +++ b/VAE/06-Prepare-CelebA-datasets.ipynb @@ -111,6 +111,16 @@ "# output_dir = f'{datasets_dir}/celeba/enhanced'\n", "# exit_if_exist = True\n", "\n", + "# ---- Just for tests\n", + "# Save clustered dataset in ./data\n", + "#\n", + "# scale = 0.05\n", + "# seed = 123\n", + "# cluster_size = 10000\n", + "# image_size = (192,160)\n", + "# output_dir = './data'\n", + "# exit_if_exist = False\n", + "\n", "# ---- Full clusters generation, large size : 138 GB\n", "# Save clustered dataset in <datasets_dir> \n", "#\n", @@ -357,7 +367,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/VAE/09-VAE-with-CelebA-192x160.ipynb b/VAE/09-VAE-with-CelebA-192x160.ipynb new file mode 100644 index 0000000..2ac969a --- /dev/null +++ b/VAE/09-VAE-with-CelebA-192x160.ipynb @@ -0,0 +1,457 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "<img width=\"800px\" src=\"../fidle/img/00-Fidle-header-01.svg\"></img>\n", + "\n", + "# <!-- TITLE --> [VAE9] - Training session for our VAE with 192x160 images\n", + "<!-- DESC --> Episode 4 : Training with our clustered datasets in notebook or batch mode\n", + "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", + "\n", + "## Objectives :\n", + " - Build and train a VAE model with a large dataset in **medium resolution 140 GB**\n", + " - Understanding a more advanced programming model with **data generator**\n", + "\n", + "The [CelebFaces Attributes Dataset (CelebA)](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) contains about 200,000 images (202599,218,178,3). \n", + "\n", + "## What we're going to do :\n", + "\n", + " - Defining a VAE model\n", + " - Build the model\n", + " - Train it\n", + " - Follow the learning process with Tensorboard\n", + "\n", + "## Acknowledgements :\n", + "As before, thanks to **François Chollet** who is at the base of this example. \n", + "See : https://keras.io/examples/generative/vae\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1 - Init python stuff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import sys\n", + "\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers\n", + "from tensorflow.keras.callbacks import TensorBoard\n", + "\n", + "from modules.models import VAE\n", + "from modules.layers import SamplingLayer\n", + "from modules.callbacks import ImagesCallback, BestModelCallback\n", + "from modules.datagen import DataGenerator\n", + "\n", + "sys.path.append('..')\n", + "import fidle.pwk as pwk\n", + "\n", + "run_dir = './run/VAE9'\n", + "datasets_dir = pwk.init('VAE9', run_dir)\n", + "\n", + "VAE.about()\n", + "DataGenerator.about()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# To clean run_dir, uncomment and run this next line\n", + "# ! rm -r \"$run_dir\"/images-* \"$run_dir\"/logs \"$run_dir\"/figs \"$run_dir\"/models ; rmdir \"$run_dir\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2 - Parameters\n", + "`scale` : With scale=1, we need 1'30s on a GPU V100 ...and >20' on a CPU ! \n", + "`latent_dim` : 2 dimensions is small, but usefull to draw ! \n", + "`fit_verbosity` : verbosity during training : 0 = silent, 1 = progress bar, 2 = one line per epoch \n", + "`loss_weights` : Our **loss function** is the weighted sum of two loss:\n", + " - `r_loss` which measures the loss during reconstruction. \n", + " - `kl_loss` which measures the dispersion. \n", + "\n", + "The weights are defined by: `loss_weights=[k1,k2]` where : `total_loss = k1*r_loss + k2*kl_loss` \n", + "In practice, a value of \\[.6,.4\\] gives good results here.\n", + "\n", + "\n", + "Uncomment the right lines according to what you want." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fit_verbosity = 1\n", + "\n", + "# ---- For tests\n", + "\n", + "scale = 0.01\n", + "image_size = (192,160)\n", + "enhanced_dir = './data'\n", + "latent_dim = 300\n", + "loss_weights = [.6,.4]\n", + "batch_size = 64\n", + "epochs = 5\n", + "\n", + "# ---- Training with a full dataset of large images\n", + "#\n", + "# scale = 1.\n", + "# image_size = (192,160)\n", + "# enhanced_dir = f'{datasets_dir}/celeba/enhanced'\n", + "# latent_dim = 300\n", + "# loss_weights = [.6,.4]\n", + "# batch_size = 64\n", + "# epochs = 15" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Override parameters (batch mode) - Just forget this cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pwk.override('scale', 'image_size', 'enhanced_dir', 'latent_dim', 'loss_weights')\n", + "pwk.override('batch_size', 'epochs', 'fit_verbosity')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3 - Prepare data\n", + "Let's instantiate our generator for the entire dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 - Finding the right place" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lx,ly = image_size\n", + "train_dir = f'{enhanced_dir}/clusters-{lx}x{ly}'\n", + "\n", + "print('Train directory is :',train_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.2 - Get a DataGenerator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_gen = DataGenerator(train_dir, 32, scale=scale)\n", + "\n", + "print(f'Data generator is ready with : {len(data_gen)} batchs of {data_gen.batch_size} images, or {data_gen.dataset_size} images')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4 - Build model\n", + "Note: We conserve the geometry of our last convolutional output (shape_before_flattening) so that we can adapt the decoder to the encoder." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Encoder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(lx, ly, 3))\n", + "x = layers.Conv2D(32, 4, strides=2, padding=\"same\", activation=\"relu\")(inputs)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.Conv2D(64, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.Conv2D(128, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.Conv2D(256, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.Conv2D(512, 4, strides=2, padding=\"same\", activation=\"relu\")(x)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.Flatten()(x)\n", + "\n", + "z_mean = layers.Dense(latent_dim, name=\"z_mean\")(x)\n", + "z_log_var = layers.Dense(latent_dim, name=\"z_log_var\")(x)\n", + "z = SamplingLayer()([z_mean, z_log_var])\n", + "\n", + "encoder = keras.Model(inputs, [z_mean, z_log_var, z], name=\"encoder\")\n", + "encoder.compile()\n", + "# encoder.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Decoder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "inputs = keras.Input(shape=(latent_dim,))\n", + "\n", + "x = layers.Dense(512*6*5)(inputs)\n", + "x = layers.Reshape((6,5,512))(x)\n", + "\n", + "x = layers.UpSampling2D()(x)\n", + "x = layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu')(x)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.UpSampling2D()(x)\n", + "x = layers.Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu')(x)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.UpSampling2D()(x)\n", + "x = layers.Conv2D(128, kernel_size=3, strides=1, padding='same', activation='relu')(x)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.UpSampling2D()(x)\n", + "x = layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu')(x)\n", + "x = layers.BatchNormalization(axis=1)(x)\n", + "\n", + "x = layers.UpSampling2D()(x)\n", + "outputs = layers.Conv2D(3, kernel_size=3, strides=1, padding='same', activation='sigmoid')(x)\n", + "\n", + "decoder = keras.Model(inputs, outputs, name=\"decoder\")\n", + "decoder.compile()\n", + "# decoder.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### VAE\n", + "Our loss function is the weighted sum of two values. \n", + "`reconstruction_loss` which measures the loss during reconstruction. \n", + "`kl_loss` which measures the dispersion. \n", + "\n", + "The weights are defined by: `r_loss_factor` : \n", + "`total_loss = r_loss_factor*reconstruction_loss + (1-r_loss_factor)*kl_loss`\n", + "\n", + "if `r_loss_factor = 1`, the loss function includes only `reconstruction_loss` \n", + "if `r_loss_factor = 0`, the loss function includes only `kl_loss` \n", + "In practice, a value arround 0.5 gives good results here.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vae = VAE(encoder, decoder, loss_weights)\n", + "\n", + "vae.compile(optimizer=keras.optimizers.Adam())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5 - Train\n", + "With `scale=1`, need 20' for 10 epochs on a V100 (IDRIS) \n", + "...on a basic CPU, may be >40 hours !" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.1 - Callbacks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_draw,_ = data_gen[0]\n", + "data_gen.rewind()\n", + "\n", + "callback_images = ImagesCallback(x=x_draw, z_dim=latent_dim, nb_images=5, from_z=True, from_random=True, run_dir=run_dir)\n", + "callback_bestmodel = BestModelCallback( run_dir + '/models/best_model.h5' )\n", + "callback_tensorboard = TensorBoard(log_dir=run_dir + '/logs', histogram_freq=1)\n", + "\n", + "callbacks_list = [callback_images, callback_bestmodel]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.2 - Train it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pwk.chrono_start()\n", + "\n", + "history = vae.fit(data_gen, epochs=epochs, batch_size=batch_size, callbacks=callbacks_list, verbose=fit_verbosity)\n", + "\n", + "pwk.chrono_show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6 - Training review\n", + "### 6.1 - History" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pwk.plot_history(history, plot={\"Loss\":['loss','r_loss', 'kl_loss']}, save_as='01-history')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6.2 - Reconstruction during training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "images_z, images_r = callback_images.get_images( range(0,epochs,2) )\n", + "\n", + "pwk.subtitle('Original images :')\n", + "pwk.plot_images(x_draw[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as='02-original')\n", + "\n", + "pwk.subtitle('Encoded/decoded images')\n", + "pwk.plot_images(images_z, None, indices='all', columns=5, x_size=2,y_size=2, save_as='03-reconstruct')\n", + "\n", + "pwk.subtitle('Original images :')\n", + "pwk.plot_images(x_draw[:5], None, indices='all', columns=5, x_size=2,y_size=2, save_as=None)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6.3 - Generation (latent -> decoder) during training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pwk.subtitle('Generated images from latent space')\n", + "pwk.plot_images(images_r, None, indices='all', columns=5, x_size=2,y_size=2, save_as='04-encoded')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pwk.end()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "<img width=\"80px\" src=\"../fidle/img/00-Fidle-logo-01.svg\"></img>" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387" + }, + "kernelspec": { + "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/VAE/09-VAE-with-CelebA-post.ipynb b/VAE/10-VAE-with-CelebA-post.ipynb similarity index 100% rename from VAE/09-VAE-with-CelebA-post.ipynb rename to VAE/10-VAE-with-CelebA-post.ipynb -- GitLab