Correction on VAE10

20dabf20 · Jean-Luc Parouty · c3cf7448 · 20dabf20
Commit 20dabf20 authored 3 years ago by Jean-Luc Parouty
--- a/VAE/10-VAE-with-CelebA-post.ipynb
+++ b/VAE/10-VAE-with-CelebA-post.ipynb
@@ -6,7 +6,7 @@
   "source": [
    "<img width=\"800px\" src=\"../fidle/img/00-Fidle-header-01.svg\"></img>\n",
    "\n",
-    "# <!-- TITLE --> [VAE9] - Data generation from latent space\n",
+    "# <!-- TITLE --> [VAE10] - Data generation from latent space\n",
    "<!-- DESC --> Episode 5 : Exploring latent space to generate new data\n",
    "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
    "\n",
@@ -61,7 +61,7 @@
    "import fidle.pwk as pwk\n",
    "\n",
    "run_dir = './run/VAE8.001'\n",
-    "datasets_dir = pwk.init('VAE9', run_dir)\n",
+    "datasets_dir = pwk.init('VAE10', run_dir)\n",
    "\n",
    "VAE.about()"
   ]

 %% Cell type:markdown id: tags:

 <img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>

-# <!-- TITLE --> [VAE9] - Data generation from latent space
+# <!-- TITLE --> [VAE10] - Data generation from latent space
 <!-- DESC --> Episode 5 : Exploring latent space to generate new data
 <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->

 ## Objectives :
 - New data generation from **latent space**
 - Understanding of underlying principles
 - Guided image generation, **latent morphing**
 - Model management

 Here again, we don't consume data anymore, but we generate them ! ;-)


 The [CelebFaces Attributes Dataset (CelebA)](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) contains about 200,000 images (202599,218,178,3)...
 ...But our data is now in the imagination of our network!

 ## What we're going to do :
 - Load a saved model
 - Reconstruct some images from latent space
 - Matrix of generated images

 %% Cell type:markdown id: tags:

 ## Step 1 - Init python stuff

 %% Cell type:code id: tags:

 ``` 
 import numpy as np
 import pandas as pd
 import scipy.stats
 from skimage import io, transform
 import os,sys,importlib
 import math
 from importlib import reload

 import matplotlib
 import matplotlib.pyplot as plt

 from scipy.stats import norm

 from modules.datagen import DataGenerator
 from modules.models  import VAE

 sys.path.append('..')
 import fidle.pwk as pwk

 run_dir = './run/VAE8.001'
-datasets_dir = pwk.init('VAE9', run_dir)
+datasets_dir = pwk.init('VAE10', run_dir)

 VAE.about()
 ```

 %% Cell type:markdown id: tags:

 ## Step 2 - Parameters
 **Note :** We only have one set of data, used for training.
 We did not separate our data between learning and testing because our goal is to generate data.

 Define these parameters according to the clustered dataset you wish to use...

 %% Cell type:code id: tags:

 ``` 
 # --- Tests
 #
 image_size   = (128,128)
 enhanced_dir = './data'

 # --- Full clusters (128,128)
 #
 # image_size   = (128,128)
 # enhanced_dir = f'{datasets_dir}/celeba/enhanced'

 # ---- Full clusters (192,160)
 #
 # image_size   = (192,160)
 # enhanced_dir = f'{datasets_dir}/celeba/enhanced'
 ```

 %% Cell type:code id: tags:

 ``` 
 # ---- Used for continous integration - Just forget this line
 #
 pwk.override('image_size', 'enhanced_dir')
 ```

 %% Cell type:markdown id: tags:

 ## Step 3 - Gets some data

 %% Cell type:code id: tags:

 ``` 
 # ---- the place of the clusters files

 lx,ly        = image_size
 train_dir    = f'{enhanced_dir}/clusters-{lx}x{ly}'
 dataset_csv  = f'{datasets_dir}/celeba/origine/list_attr_celeba.csv'
 dataset_img  = f'{datasets_dir}/celeba/origine/img_align_celeba'

 # ---- Get images (one cluster)

 x_data       = np.load(f'{train_dir}/images-000.npy')

 # ---- Get descriptions

 dataset_desc = pd.read_csv(dataset_csv, header=0)

 print('Data directory is :',train_dir)
 print('Images retrieved  :',len(x_data))
 print('Descriptions      :',len(dataset_desc))
 ```

 %% Cell type:markdown id: tags:

 ## Step 4 - Reload best model

 %% Cell type:code id: tags:

 ``` 
 vae=VAE()
 vae.reload(f'{run_dir}/models/best_model')
 ```

 %% Cell type:markdown id: tags:

 ## Step 5 - Image reconstruction

 %% Cell type:code id: tags:

 ``` 
 n_show = 8
 np.random.shuffle(x_data)

 # ---- Get latent points and reconstructed images

 # y_reconst = vae.predict(x_data)

 z_mean, z_log_var, z_data    = vae.encoder.predict(x_data)
 y_reconst                    = vae.decoder.predict(z_data)

 # ---- Just show it

 pwk.plot_images(x_data[:10],    None, columns=10, x_size=1.5,y_size=1.5, spines_alpha=0.1, save_as='01-original')
 pwk.plot_images(y_reconst[:10], None, columns=10, x_size=1.5,y_size=1.5, spines_alpha=0.1, save_as='02-reconstruct')
 ```

 %% Cell type:markdown id: tags:

 ## Step 6 - Latent space distribution

 %% Cell type:code id: tags:

 ``` 
 z_dim = z_data.shape[1]
 x = np.linspace(-3, 3, 100)

 fig = plt.figure(figsize=(12, 10))
 fig.subplots_adjust(hspace=0.3, wspace=0.2)

 for i in range(40):
    ax = fig.add_subplot(4, 10, i+1)
    ax.hist(z_data[:,i], density=True, bins = 20)
    ax.axis('off')
    ax.set_xlim(-3,3)
    ax.text(0.5, -0.2, str(i), fontsize=14, ha='center', transform=ax.transAxes)
    ax.plot(x,norm.pdf(x))

 pwk.save_fig('03-latent-space')
 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## Step 7 - Generation of new faces

 %% Cell type:code id: tags:

 ``` 
 n_new = 48

 z_new = np.random.normal( loc=0,scale=0.7,size=(n_new,z_dim) )
 x_new = vae.decoder.predict(z_new)

 pwk.plot_images(x_new, None, columns=6, x_size=2,y_size=2.4, spines_alpha=0,y_padding=0, save_as='04-new-faces')
 ```

 %% Cell type:markdown id: tags:

 ## Step 8 - Playing with latent space
 ### 8.1 - The attributes of our images

 %% Cell type:code id: tags:

 ``` 
 pwk.subtitle('Dataset description file (csv) :')
 display(dataset_desc.head())

 pwk.subtitle('Defined attributes :')
 for i,v in enumerate(dataset_desc.columns):
    print(f'{v:24}', end='')
    if (i+1) % 4 == 0 :print('')
 ```

 %% Cell type:markdown id: tags:

 ### 8.2 Let's find some predictable images

 %% Cell type:code id: tags:

 ``` 
 def get_latent_vector(images_desc, vector_size=50):
    """
    Get a set of images, give them to the encoder and return an mean vector
    args:
        images_desc : Images descrption
    return:
        mean(z)
    """

    # ---- Get filenames of given images descriptions

    filenames=images_desc['image_id'][:vector_size]

    # ---- Retrieve images

    imgs=[]
    print(f'Read {vector_size} images...', end='')
    for i,filename in enumerate(filenames):
        filename = f'{dataset_img}/{filename}'
        img = io.imread(filename)
        img = transform.resize(img, image_size)
        imgs.append( img )
    print('done.')

    # ---- Get latent space vectors

    x_images=np.array(imgs)
    z_mean, z_log_var, z  = vae.encoder.predict(x_images)

    # ---- return mean vector

    return z.mean(axis=0)
 ```

 %% Cell type:code id: tags:

 ``` 
 df = dataset_desc

 z11 = get_latent_vector( df.loc[ (df['Male'] == -1)  & (df['Smiling']== 1) & (df['Blond_Hair']== 1)] )
 z12 = get_latent_vector( df.loc[ (df['Male'] == -1)  & (df['Smiling']== 1) & (df['Black_Hair']== 1)] )
 z21 = get_latent_vector( df.loc[ (df['Male'] ==  1)  & (df['Smiling']==-1) & (df['Black_Hair']== 1)] )

 labels=['Woman\nBlond hair\nSmiling','Woman\nBlack hair\nSmiling','Man\nBlack Hair\nNot smiling']


 z_images = np.array( [z11,z12,z21] )
 x_images = vae.decoder.predict( z_images, verbose=0 )
 pwk.plot_images(x_images,labels,columns=3,x_size=3,y_size=3,spines_alpha=0, save_as='05-predictable')
 ```

 %% Cell type:markdown id: tags:

 ### 8.3 - And do somme latent morphing !

 %% Cell type:code id: tags:

 ``` 

 n=6
 dj=(z12-z11)/n
 di=(z21-z11)/n

 z=[]
 for i in range(n):
    for j in range(n):
        z.append( z11+di*i+dj*j )

 x_images = vae.decoder.predict( np.array(z) )
 pwk.plot_images(x_images,columns=n,x_size=2,y_size=2.4,y_padding=0,spines_alpha=0, save_as='06-morphing')
 ```

 %% Cell type:code id: tags:

 ``` 
 pwk.end()
 ```

 %% Cell type:markdown id: tags:

 ---
 <img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>