Remove seaborn import and remove obsolete vae module

7c2ca0c7 · Jean-Luc Parouty · 88aa19d9 · 7c2ca0c7 · 88aa19d9
Commit 7c2ca0c7 authored 4 years ago by Jean-Luc Parouty
--- a/IMDB/02-Prediction.ipynb
+++ b/IMDB/02-Prediction.ipynb
@@ -134,7 +134,6 @@
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib\n",
-    "import seaborn as sns\n",
    "import pandas as pd\n",
    "\n",
    "import os,sys,h5py,json,re\n",
@@ -375,7 +374,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.9"
+   "version": "3.8.5"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id: tags:
 <img width="800px" src="../fidle/img/00-Fidle-header-01.svg"></img>
 # <!-- TITLE --> [IMDB2] - Reload and reuse a saved model
 <!-- DESC --> Retrieving a saved model to perform a sentiment analysis (movie review)
 <!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->
 ## Objectives :
 - The objective is to guess whether film reviews are **positive or negative** based on the analysis of the text.
 - For this, we will use our **previously saved model**.
 Original dataset can be find **[there](http://ai.stanford.edu/~amaas/data/sentiment/)**
 Note that [IMDb.com](https://imdb.com) offers several easy-to-use [datasets](https://www.imdb.com/interfaces/)
 For simplicity's sake, we'll use the dataset directly [embedded in Keras](https://www.tensorflow.org/api_docs/python/tf/keras/datasets)
 ## What we're going to do :
 - Preparing the data
 - Retrieve our saved model
 - Evaluate the result
 %% Cell type:markdown id: tags:
 ## Step 1 - Init python stuff
 %% Cell type:code id: tags:
 ``` python
 import numpy as np
 import tensorflow as tf
 import tensorflow.keras as keras
 import tensorflow.keras.datasets.imdb as imdb
 import matplotlib.pyplot as plt
 import matplotlib
-import seaborn as sns
 import pandas as pd
 import os,sys,h5py,json,re
 from importlib import reload
 sys.path.append('..')
 import fidle.pwk as pwk
 datasets_dir = pwk.init('IMDB2')
 ```
 %% Output
    **FIDLE 2020 - Practical Work Module**
    Version              : 0.6.1 DEV
    Notebook id          : IMDB2
    Run time             : Friday 18 December 2020, 18:21:49
    TensorFlow version   : 2.0.0
    Keras version        : 2.2.4-tf
    Datasets dir         : /home/pjluc/datasets/fidle
    Running mode         : full
    Update keras cache   : False
    Save figs            : True
    Path figs            : ./run/figs
 %% Cell type:markdown id: tags:
 ## Step 2 : Preparing the data
 ### 2.1 - Our reviews :
 %% Cell type:code id: tags:
 ``` python
 reviews = [ "This film is particularly nice, a must see.",
             "Some films are great classics and cannot be ignored.",
             "This movie is just abominable and doesn't deserve to be seen!"]
 ```
 %% Cell type:markdown id: tags:
 ### 2.2 - Retrieve dictionaries
 Note : This dictionary is generated by [01-Embedding-Keras](01-Embedding-Keras.ipynb) notebook.
 %% Cell type:code id: tags:
 ``` python
 with open('./data/word_index.json', 'r') as fp:
    word_index = json.load(fp)
    index_word = {index:word for word,index in word_index.items()}
 ```
 %% Cell type:markdown id: tags:
 ### 2.3 - Clean, index and padd
 %% Cell type:code id: tags:
 ``` python
 max_len    = 256
 vocab_size = 10000
 nb_reviews = len(reviews)
 x_data     = []
 # ---- For all reviews
 for review in reviews:
    # ---- First index must be <start>
    index_review=[1]
    # ---- For all words
    for w in review.split(' '):
        # ---- Clean it
        w_clean = re.sub(r"[^a-zA-Z0-9]", "", w)
        # ---- Not empty ?
        if len(w_clean)>0:
            # ---- Get the index
            w_index = word_index.get(w,2)
            if w_index>vocab_size : w_index=2
            # ---- Add the index if < vocab_size
            index_review.append(w_index)
    # ---- Add the indexed review
    x_data.append(index_review)
 # ---- Padding
 x_data = keras.preprocessing.sequence.pad_sequences(x_data, value   = 0, padding = 'post', maxlen  = max_len)
 ```
 %% Cell type:markdown id: tags:
 ### 2.4 - Have a look
 %% Cell type:code id: tags:
 ``` python
 def translate(x):
    return ' '.join( [index_word.get(i,'?') for i in x] )
 for i in range(nb_reviews):
    imax=np.where(x_data[i]==0)[0][0]+5
    print(f'\nText review      :',    reviews[i])
    print(  f'x_train[{i:}]       :', list(x_data[i][:imax]), '(...)')
    print(  'Translation      :', translate(x_data[i][:imax]), '(...)')
 ```
 %% Output
    Text review      : This film is particularly nice, a must see.
    x_train[0]       : [1, 2, 22, 9, 572, 2, 6, 215, 2, 0, 0, 0, 0, 0] (...)
    Translation      : <start> <unknown> film is particularly <unknown> a must <unknown> <pad> <pad> <pad> <pad> <pad> (...)
    Text review      : Some films are great classics and cannot be ignored.
    x_train[1]       : [1, 2, 108, 26, 87, 2239, 5, 566, 30, 2, 0, 0, 0, 0, 0] (...)
    Translation      : <start> <unknown> films are great classics and cannot be <unknown> <pad> <pad> <pad> <pad> <pad> (...)
    Text review      : This movie is just abominable and doesn't deserve to be seen!
    x_train[2]       : [1, 2, 20, 9, 43, 2, 5, 152, 1833, 8, 30, 2, 0, 0, 0, 0, 0] (...)
    Translation      : <start> <unknown> movie is just <unknown> and doesn't deserve to be <unknown> <pad> <pad> <pad> <pad> <pad> (...)
 %% Cell type:markdown id: tags:
 ## Step 2 - Bring back the model
 %% Cell type:code id: tags:
 ``` python
 model = keras.models.load_model('./run/models/best_model.h5')
 ```
 %% Cell type:markdown id: tags:
 ## Step 4 - Predict
 %% Cell type:code id: tags:
 ``` python
 y_pred   = model.predict(x_data)
 ```
 %% Cell type:markdown id: tags:
 #### And the winner is :
 %% Cell type:code id: tags:
 ``` python
 for i in range(nb_reviews):
    print(f'\n{reviews[i]:<70} =>',('NEGATIVE' if y_pred[i][0]<0.5 else 'POSITIVE'),f'({y_pred[i][0]:.2f})')
 ```
 %% Output
    This film is particularly nice, a must see.                            => POSITIVE (0.56)
    Some films are great classics and cannot be ignored.                   => POSITIVE (0.63)
    This movie is just abominable and doesn't deserve to be seen!          => NEGATIVE (0.35)
 %% Cell type:code id: tags:
 ``` python
 pwk.end()
 ```
 %% Output
    End time is : Friday 18 December 2020, 18:21:50
    Duration is : 00:00:01 555ms
    This notebook ends here
 %% Cell type:markdown id: tags:
 ---
 <img width="80px" src="../fidle/img/00-Fidle-logo-01.svg"></img>

--- a/VAE/modules/vae-obsolete.py
+++ b/VAE/modules/vae-obsolete.py
-# ------------------------------------------------------------------
-#     _____ _     _ _
-#    |  ___(_) __| | | ___
-#    | |_  | |/ _` | |/ _ \
-#    |  _| | | (_| | |  __/
-#    |_|   |_|\__,_|_|\___|
-# ------------------------------------------------------------------
-# Formation Introduction au Deep Learning  (FIDLE)
-# CNRS/SARI/DEVLOG 2020 - S. Arias, E. Maldonado, JL. Parouty
-# ------------------------------------------------------------------
-# by JL Parouty (feb 2020), based on David Foster examples.
-import numpy as np
-import math
-import tensorflow as tf
-import tensorflow.keras as keras
-from tensorflow.keras import backend as K
-from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Reshape, Lambda
-from tensorflow.keras.layers import Activation, BatchNormalization, LeakyReLU, Dropout
-from tensorflow.keras.models import Model
-from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
-from tensorflow.keras.optimizers import Adam
-from tensorflow.keras.utils import plot_model
-from modules.callbacks      import ImagesCallback
-from modules.data_generator import DataGenerator
-import os, json, time, datetime
-from IPython.display import display,Image,Markdown,HTML
-class VariationalAutoencoder():
-    version = '1.28'
-    def __init__(self, input_shape=None, encoder_layers=None, decoder_layers=None, z_dim=None, run_tag='000', verbose=0):
-        self.name           = 'Variational AutoEncoder'
-        self.input_shape    = list(input_shape)
-        self.encoder_layers = encoder_layers
-        self.decoder_layers = decoder_layers
-        self.z_dim          = z_dim
-        self.run_tag        = str(run_tag)
-        self.verbose        = verbose
-        self.run_directory  = f'./run/{run_tag}'
-        # ---- Create run directories
-        for d in ('','/models','/figs','/logs','/images'):
-            os.makedirs(self.run_directory+d, mode=0o750, exist_ok=True)
-        # ==== Encoder ================================================================
-        # ---- Input layer
-        encoder_input = Input(shape=self.input_shape, name='encoder_input')
-        x = encoder_input
-        # ---- Add next layers
-        i=1
-        for l_config in encoder_layers:
-            l_type   = l_config['type']
-            l_params = l_config.copy()
-            l_params.pop('type')
-            if l_type=='Conv2D':
-                layer = Conv2D(**l_params)
-            if l_type=='Dropout':
-                layer = Dropout(**l_params)
-            x = layer(x)
-            i+=1
-        # ---- Flatten
-        shape_before_flattening = K.int_shape(x)[1:]
-        x = Flatten()(x)
-        # ---- mu <-> log_var
-        self.mu      = Dense(self.z_dim, name='mu')(x)
-        self.log_var = Dense(self.z_dim, name='log_var')(x)
-        self.encoder_mu_log_var = Model(encoder_input, (self.mu, self.log_var))
-        # ---- output layer
-        def sampling(args):
-            mu, log_var = args
-            epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
-            return mu + K.exp(log_var / 2) * epsilon
-        encoder_output = Lambda(sampling, name='encoder_output')([self.mu, self.log_var])
-        self.encoder = Model(encoder_input, encoder_output)
-        # ==== Decoder ================================================================
-        # ---- Input layer
-        decoder_input = Input(shape=(self.z_dim,), name='decoder_input')
-        # ---- First dense layer
-        x = Dense(np.prod(shape_before_flattening))(decoder_input)
-        x = Reshape(shape_before_flattening)(x)
-        # ---- Add next layers
-        i=1
-        for l_config in decoder_layers:
-            l_type   = l_config['type']
-            l_params = l_config.copy()
-            l_params.pop('type')
-            if l_type=='Conv2DTranspose':
-                layer = Conv2DTranspose(**l_params)
-            if l_type=='Dropout':
-                layer = Dropout(**l_params)
-            x = layer(x)
-            i+=1
-        decoder_output = x
-        self.decoder = Model(decoder_input, decoder_output)
-        # ==== Encoder-Decoder ========================================================
-        model_input = encoder_input
-        model_output = self.decoder(encoder_output)
-        self.model = Model(model_input, model_output)
-        # ==== Verbosity ==============================================================
-        self.subtitle('Model initialized.')
-        print(f'Outputs will be in  : {self.run_directory}')
-        if verbose>0 :
-            self.subtitle('Encoder :')
-            self.encoder.summary()
-            self.subtitle('Decoder :')
-            self.decoder.summary()
-#             self.plot_model()
-    def compile(self, optimizer='adam', r_loss_factor='1000'):
-        self.r_loss_factor = r_loss_factor
-        def vae_r_loss(y_true, y_pred):
-            r_loss = K.mean(K.square(y_true - y_pred), axis = [1,2,3])
-            return r_loss_factor * r_loss
-        def vae_kl_loss(y_true, y_pred):
-            kl_loss =  -0.5 * K.sum(1 + self.log_var - K.square(self.mu) - K.exp(self.log_var), axis = 1)
-            return kl_loss
-        def vae_loss(y_true, y_pred):
-            r_loss = vae_r_loss(y_true, y_pred)
-            kl_loss = vae_kl_loss(y_true, y_pred)
-            return  r_loss + kl_loss
-        # See : https://github.com/tensorflow/tensorflow/issues/34944
-        # See : https://github.com/tensorflow/probability/issues/519
-        #
-        # Uncomment :
-        # tf.config.experimental_run_functions_eagerly(True)
-        #
-        # Works fine in versions 2.2, 2.3 but with horible perf. (7s -> 1'50s)
-        #
-        self.model.compile(optimizer=optimizer, 
-                           loss = vae_loss,
-                           metrics = [vae_r_loss, vae_kl_loss],
-                           experimental_run_tf_function=False)
-        print('Compiled.')
-    def train(self, 
-              x_train=None,
-              x_test=None,
-              data_generator=None,
-              batch_size=32, 
-              epochs=20,
-              initial_epoch=0,
-              k_size=1
-             ):
-        # ---- Data given or via generator
-        mode_data = (data_generator is None)
-        # ---- Size of the dataset we are going to use
-        #      k_size ==1 : mean 100%
-        #      Unused with data generator
-        #
-        if mode_data:
-            n_train = int(x_train.shape[0] * k_size)
-            n_test  = int(x_test.shape[0]  * k_size)
-        # ---- Callback : Images
-        filename = self.run_directory+"/images/image-{epoch:03d}-{i:02d}.jpg"
-        callbacks_images = ImagesCallback(filename, z_dim=self.z_dim, decoder=self.decoder)
-        # ---- Callback : Checkpoint
-        filename = self.run_directory+"/models/model-{epoch:03d}.h5"
-        callback_chkpts = ModelCheckpoint(filename, save_freq='epoch' ,verbose=0)
-        # ---- Callback : Best model
-        filename = self.run_directory+"/models/best_model.h5"
-        callback_bestmodel = ModelCheckpoint(filename, save_best_only=True, mode='min',monitor='val_loss',verbose=0)
-        # ---- Callback tensorboard
-        dirname = self.run_directory+"/logs"
-        callback_tensorboard = TensorBoard(log_dir=dirname, histogram_freq=1)
-        callbacks_list = [callbacks_images, callback_chkpts, callback_bestmodel, callback_tensorboard]
-#         callbacks_list = [callback_chkpts, callback_bestmodel, callback_tensorboard]
-        # ---- Let's go...
-        start_time   = time.time()
-        if mode_data:
-            #
-            # ---- With pure data (x_train) -----------------------------------------
-            #                             
-            self.history = self.model.fit(x_train[:n_train], x_train[:n_train],
-                                          batch_size = batch_size,
-                                          shuffle = True,
-                                          epochs = epochs,
-                                          initial_epoch = initial_epoch,
-                                          callbacks = callbacks_list,
-                                          validation_data = (x_test[:n_test], x_test[:n_test])
-                                          )
-            #
-        else:
-            # ---- With Data Generator ----------------------------------------------
-            #
-            self.history = self.model.fit(data_generator,
-                                          shuffle = True,
-                                          epochs = epochs,
-                                          initial_epoch = initial_epoch,
-                                          callbacks = callbacks_list,
-                                          validation_data = (x_test, x_test)
-                                         )
-        end_time  = time.time()
-        dt  = end_time-start_time
-        dth = str(datetime.timedelta(seconds=int(dt)))
-        self.duration = dt
-        print(f'\nTrain duration : {dt:.2f} sec. - {dth:}')
-    def plot_model(self):
-        d=self.run_directory+'/figs'
-        plot_model(self.model,   to_file=f'{d}/model.png',   show_shapes = True, show_layer_names = True, expand_nested=True)
-        plot_model(self.encoder, to_file=f'{d}/encoder.png', show_shapes = True, show_layer_names = True)
-        plot_model(self.decoder, to_file=f'{d}/decoder.png', show_shapes = True, show_layer_names = True)
-    def save(self,config='vae_config.json', model='model.h5', force=False):
-        # ---- Check if the place is still used
-        if os.path.isfile(self.run_directory+'/models/best_model.h5') and not force:
-            print('\n*** Oops. There are already stuff in the target folder !\n')
-            assert False, f'Tag directory {self.run_directory} is not empty...'
-        # ---- Save config in json
-        if config!=None:
-            to_save  = ['input_shape', 'encoder_layers', 'decoder_layers', 'z_dim', 'run_tag', 'verbose']
-            data     = { i:self.__dict__[i] for i in to_save }
-            filename = self.run_directory+'/models/'+config
-            with open(filename, 'w') as outfile:
-                json.dump(data, outfile)
-            print(f'\nConfig saved in     : {filename}')
-        # ---- Save model
-        if model!=None:
-            filename = self.run_directory+'/models/'+model
-            self.model.save(filename)
-            print(f'\nModel saved in      : {filename}')
-    def load_weights(self,model='model.h5'):
-        filename = self.run_directory+'/models/'+model
-        self.model.load_weights(filename)
-        print(f'Weights loaded from : {filename}')
-    @classmethod
-    def load(cls, run_tag='000', config='vae_config.json', weights='model.h5'):
-        # ---- Instantiate a new vae
-        filename = f'./run/{run_tag}/models/{config}'
-        with open(filename, 'r') as infile:
-            params=json.load(infile)
-            vae=cls( **params)
-        # ---- weights==None, just return it
-        if weights==None: return vae
-        # ---- weights!=None, get weights
-        vae.load_weights(weights)
-        return vae
-    @classmethod
-    def about(cls):
-        cls.subtitle('FIDLE 2020 - Variational AutoEncoder (VAE)')
-        print('TensorFlow version   :',tf.__version__)
-        print('VAE version          :', cls.version)
-    @classmethod
-    def subtitle(cls,text):
-        display(Markdown(f'<br>**{text}**'))