diff --git a/BHPD.Keras3/02-DNN-Regression-Premium.ipynb b/BHPD.Keras3/02-DNN-Regression-Premium.ipynb index d3a23c48895fe7ad95dea25c8dec4712a53877ce..65cdd8bd312dffde3374e99ee2fa463427ad70fa 100644 --- a/BHPD.Keras3/02-DNN-Regression-Premium.ipynb +++ b/BHPD.Keras3/02-DNN-Regression-Premium.ipynb @@ -284,8 +284,8 @@ "metadata": {}, "outputs": [], "source": [ - "os.makedirs('./run/models', mode=0o750, exist_ok=True)\n", - "save_dir = \"./run/models/best_model.keras\"\n", + "os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)\n", + "save_dir = f'{run_dir}/models/best_model.keras'\n", "\n", "savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_mae', mode='max', save_best_only=True)" ] @@ -383,7 +383,7 @@ "metadata": {}, "outputs": [], "source": [ - "loaded_model = keras.models.load_model('./run/models/best_model.keras')\n", + "loaded_model = keras.models.load_model(f'{run_dir}/models/best_model.keras')\n", "loaded_model.summary()\n", "print(\"Loaded.\")" ] diff --git a/Embedding.Keras2/01-One-hot-encoding.ipynb b/Embedding.Keras3/01-One-hot-encoding.ipynb similarity index 87% rename from Embedding.Keras2/01-One-hot-encoding.ipynb rename to Embedding.Keras3/01-One-hot-encoding.ipynb index e4e689f847225906dce47c80089d8af93708ed55..7c7451c72d7da7a9a014506a04ec7c8cfeb07fdf 100644 --- a/Embedding.Keras2/01-One-hot-encoding.ipynb +++ b/Embedding.Keras3/01-One-hot-encoding.ipynb @@ -6,8 +6,8 @@ "source": [ "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n", "\n", - "# <!-- TITLE --> [K2IMDB1] - Sentiment analysis with hot-one encoding\n", - "<!-- DESC --> A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 2 and Tensorflow (obsolete)\n", + "# <!-- TITLE --> [K3IMDB1] - Sentiment analysis with hot-one encoding\n", + "<!-- DESC --> A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch\n", "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", "\n", "## Objectives :\n", @@ -41,24 +41,20 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "\n", - "import tensorflow as tf\n", - "import tensorflow.keras as keras\n", - "import tensorflow.keras.datasets.imdb as imdb\n", + "import os\n", + "os.environ['KERAS_BACKEND'] = 'torch'\n", "\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib\n", + "import keras\n", + "import keras.datasets.imdb as imdb\n", "\n", + "import numpy as np\n", "import pandas as pd\n", - "\n", - "import os,sys,h5py,json\n", - "from importlib import reload\n", + "import matplotlib.pyplot as plt\n", "\n", "import fidle\n", "\n", "# Init Fidle environment\n", - "run_id, run_dir, datasets_dir = fidle.init('K2IMDB1')" + "run_id, run_dir, datasets_dir = fidle.init('K3IMDB1')" ] }, { @@ -232,17 +228,22 @@ "source": [ "# ----- Retrieve x,y\n", "#\n", - "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words=vocab_size, skip_top=hide_most_frequently)\n", + "start_char = 1 # Start of a sequence (padding is 0)\n", + "oov_char = 2 # Out-of-vocabulary\n", + "index_from = 3 # First word id\n", "\n", - "y_train = np.asarray(y_train).astype('float32')\n", - "y_test = np.asarray(y_test ).astype('float32')\n", + "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words = vocab_size, \n", + " skip_top = hide_most_frequently,\n", + " start_char = start_char, \n", + " oov_char = oov_char, \n", + " index_from = index_from)\n", "\n", "# ---- About\n", "#\n", "print(\"Max(x_train,x_test) : \", fidle.utils.rmax([x_train,x_test]) )\n", "print(\"Min(x_train,x_test) : \", fidle.utils.rmin([x_train,x_test]) )\n", - "print(\"x_train : {} y_train : {}\".format(x_train.shape, y_train.shape))\n", - "print(\"x_test : {} y_test : {}\".format(x_test.shape, y_test.shape))" + "print(\"Len(x_train) : \", len(x_train))\n", + "print(\"Len(x_test) : \", len(x_test))\n" ] }, { @@ -283,9 +284,9 @@ "#\n", "word_index = imdb.get_word_index()\n", "\n", - "# ---- Shift the dictionary from +3\n", + "# ---- Shift the dictionary from <index_from>\n", "#\n", - "word_index = {w:(i+3) for w,i in word_index.items()}\n", + "word_index = {w:(i+index_from) for w,i in word_index.items()}\n", "\n", "# ---- Add <pad>, <start> and <unknown> tags\n", "#\n", @@ -340,7 +341,7 @@ "outputs": [], "source": [ "sizes=[len(i) for i in x_train]\n", - "plt.figure(figsize=(16,6))\n", + "plt.figure(figsize=(12,4))\n", "plt.hist(sizes, bins=400)\n", "plt.gca().set(title='Distribution of reviews by size - [{:5.2f}, {:5.2f}]'.format(min(sizes),max(sizes)), \n", " xlabel='Size', ylabel='Density', xlim=[0,1500])\n", @@ -354,8 +355,8 @@ "metadata": {}, "outputs": [], "source": [ - "unk=[ 100*(s.count(2)/len(s)) for s in x_train]\n", - "plt.figure(figsize=(16,6))\n", + "unk=[ 100*(s.count(oov_char)/len(s)) for s in x_train]\n", + "plt.figure(figsize=(12,4))\n", "plt.hist(unk, bins=100)\n", "plt.gca().set(title='Percent of unknown words - [{:5.2f}, {:5.2f}]'.format(min(unk),max(unk)), \n", " xlabel='# unknown', ylabel='Density', xlim=[0,30])\n", @@ -384,7 +385,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.1 - Our one-hot encoder" + "### 5.1 - Our one-hot encoder function" ] }, { @@ -394,7 +395,7 @@ "outputs": [], "source": [ "def one_hot_encoder(x, vector_size=10000):\n", - " \n", + "\n", " # ---- Set all to 0\n", " #\n", " x_encoded = np.zeros((len(x), vector_size))\n", @@ -431,7 +432,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Step 6 - Build the model" + "## Step 6 - Build a nice model" ] }, { @@ -440,35 +441,16 @@ "metadata": {}, "outputs": [], "source": [ - "def get_model(vector_size=10000):\n", - " \n", - " model = keras.Sequential()\n", - " model.add(keras.layers.Input( shape=(vector_size,) ))\n", - " model.add(keras.layers.Dense( 32, activation='relu'))\n", - " model.add(keras.layers.Dense( 32, activation='relu'))\n", - " model.add(keras.layers.Dense( 1, activation='sigmoid'))\n", + "model = keras.Sequential(name='My IMDB classifier')\n", + "\n", + "model.add(keras.layers.Input( shape=(vocab_size,) ))\n", + "model.add(keras.layers.Dense( 32, activation='relu'))\n", + "model.add(keras.layers.Dense( 32, activation='relu'))\n", + "model.add(keras.layers.Dense( 1, activation='sigmoid'))\n", " \n", - " model.compile(optimizer = 'rmsprop',\n", + "model.compile(optimizer = 'rmsprop',\n", " loss = 'binary_crossentropy',\n", " metrics = ['accuracy'])\n", - " return model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 7 - Train the model\n", - "### 7.1 - Get it" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = get_model(vector_size=vocab_size)\n", "\n", "model.summary()" ] @@ -477,7 +459,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 7.2 - Add callback" + "## Step 7 - Train the model\n", + "### 7.1 - Add callback" ] }, { @@ -487,15 +470,16 @@ "outputs": [], "source": [ "os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)\n", - "save_dir = f'{run_dir}/models/best_model.h5'\n", - "savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)" + "save_dir = f'{run_dir}/models/best_model.keras'\n", + "\n", + "savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 7.3 - Train it" + "### 7.2 - Train it" ] }, { @@ -545,13 +529,14 @@ "metadata": {}, "outputs": [], "source": [ - "model = keras.models.load_model(f'{run_dir}/models/best_model.h5')\n", + "model = keras.models.load_model(f'{run_dir}/models/best_model.keras')\n", "\n", "# ---- Evaluate\n", "score = model.evaluate(x_test, y_test, verbose=0)\n", "\n", - "print('x_test / loss : {:5.4f}'.format(score[0]))\n", - "print('x_test / accuracy : {:5.4f}'.format(score[1]))\n", + "print('\\n\\nModel evaluation :\\n')\n", + "print(' x_test / loss : {:5.4f}'.format(score[0]))\n", + "print(' x_test / accuracy : {:5.4f}'.format(score[1]))\n", "\n", "values=[score[1], 1-score[1]]\n", "fidle.scrawler.donut(values,[\"Accuracy\",\"Errors\"], title=\"#### Accuracy donut is :\", save_as='03-donut')\n", diff --git a/Embedding.Keras2/02-Keras-embedding.ipynb b/Embedding.Keras3/02-Keras-embedding.ipynb similarity index 81% rename from Embedding.Keras2/02-Keras-embedding.ipynb rename to Embedding.Keras3/02-Keras-embedding.ipynb index b70f5a33d36864c01a32d101f0bccb4aab0ff3a8..21f5fa001b4a4e3d4ea75141a6170580644aaa16 100644 --- a/Embedding.Keras2/02-Keras-embedding.ipynb +++ b/Embedding.Keras3/02-Keras-embedding.ipynb @@ -6,8 +6,8 @@ "source": [ "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n", "\n", - "# <!-- TITLE --> [K2IMDB2] - Sentiment analysis with text embedding\n", - "<!-- DESC --> A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 2 and Tensorflow (obsolete)\n", + "# <!-- TITLE --> [K3IMDB2] - Sentiment analysis with text embedding\n", + "<!-- DESC --> A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch\n", "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", "\n", "## Objectives :\n", @@ -41,22 +41,21 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", + "import os\n", + "os.environ['KERAS_BACKEND'] = 'torch'\n", "\n", - "import tensorflow as tf\n", - "import tensorflow.keras as keras\n", - "import tensorflow.keras.datasets.imdb as imdb\n", + "import keras\n", + "import keras.datasets.imdb as imdb\n", "\n", + "import h5py,json\n", + "import numpy as np\n", + "import pandas as pd\n", "import matplotlib.pyplot as plt\n", - "import matplotlib\n", - "\n", - "import os,sys,h5py,json\n", - "from importlib import reload\n", "\n", "import fidle\n", "\n", "# Init Fidle environment\n", - "run_id, run_dir, datasets_dir = fidle.init('K2IMDB2')" + "run_id, run_dir, datasets_dir = fidle.init('K3IMDB2')" ] }, { @@ -134,17 +133,24 @@ "metadata": {}, "outputs": [], "source": [ - "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words=vocab_size, skip_top=hide_most_frequently, seed= 42,)\n", + "# ----- Retrieve x,y\n", + "#\n", + "start_char = 1 # Start of a sequence (padding is 0)\n", + "oov_char = 2 # Out-of-vocabulary\n", + "index_from = 3 # First word id\n", "\n", - "y_train = np.asarray(y_train).astype('float32')\n", - "y_test = np.asarray(y_test ).astype('float32')\n", + "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words = vocab_size, \n", + " skip_top = hide_most_frequently,\n", + " start_char = start_char, \n", + " oov_char = oov_char, \n", + " index_from = index_from)\n", "\n", "# ---- About\n", "#\n", "print(\"Max(x_train,x_test) : \", fidle.utils.rmax([x_train,x_test]) )\n", "print(\"Min(x_train,x_test) : \", fidle.utils.rmin([x_train,x_test]) )\n", - "print(\"x_train : {} y_train : {}\".format(x_train.shape, y_train.shape))\n", - "print(\"x_test : {} y_test : {}\".format(x_test.shape, y_test.shape))" + "print(\"Len(x_train) : \", len(x_train))\n", + "print(\"Len(x_test) : \", len(x_test))" ] }, { @@ -167,7 +173,7 @@ "# Create a reverse dictionary : {index:word}\n", "#\n", "word_index = imdb.get_word_index()\n", - "word_index = {w:(i+3) for w,i in word_index.items()}\n", + "word_index = {w:(i+index_from) for w,i in word_index.items()}\n", "word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )\n", "index_word = {index:word for word,index in word_index.items()} \n", "\n", @@ -184,7 +190,7 @@ "## Step 3 - Preprocess the data (padding)\n", "In order to be processed by an NN, all entries must have the **same length.** \n", "We chose a review length of **review_len** \n", - "We will therefore complete them with a padding (of \\<pad\\>\\) " + "We will therefore complete them with a padding (of 0 as \\<pad\\>\\) " ] }, { @@ -229,14 +235,11 @@ " f.create_dataset(\"y_train\", data=y_train)\n", " f.create_dataset(\"x_test\", data=x_test)\n", " f.create_dataset(\"y_test\", data=y_test)\n", + " print('Dataset h5 file saved.')\n", "\n", "with open(f'{output_dir}/word_index.json', 'w') as fp:\n", " json.dump(word_index, fp)\n", - "\n", - "with open(f'{output_dir}/index_word.json', 'w') as fp:\n", - " json.dump(index_word, fp)\n", - "\n", - "print('Saved.')" + " print('Word to index saved.')" ] }, { @@ -256,38 +259,18 @@ "metadata": {}, "outputs": [], "source": [ - "def get_model(vocab_size=10000, dense_vector_size=32, review_len=256):\n", - " \n", - " model = keras.Sequential()\n", - " model.add(keras.layers.Input( shape=(review_len,) ))\n", - " model.add(keras.layers.Embedding(input_dim = vocab_size, \n", - " output_dim = dense_vector_size, \n", - " input_length = review_len))\n", - " model.add(keras.layers.GlobalAveragePooling1D())\n", - " model.add(keras.layers.Dense(dense_vector_size, activation='relu'))\n", - " model.add(keras.layers.Dense(1, activation='sigmoid'))\n", - "\n", - " model.compile(optimizer = 'adam',\n", - " loss = 'binary_crossentropy',\n", - " metrics = ['accuracy'])\n", - " return model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 5 - Train the model\n", - "### 5.1 - Get it" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = get_model(vocab_size, dense_vector_size, review_len)\n", + "model = keras.Sequential(name='Embedding model')\n", + "\n", + "model.add(keras.layers.Input( shape=(review_len,) ))\n", + "model.add(keras.layers.Embedding( input_dim = vocab_size,\n", + " output_dim = dense_vector_size))\n", + "model.add(keras.layers.GlobalAveragePooling1D())\n", + "model.add(keras.layers.Dense(dense_vector_size, activation='relu'))\n", + "model.add(keras.layers.Dense(1, activation='sigmoid'))\n", + "\n", + "model.compile( optimizer = 'adam',\n", + " loss = 'binary_crossentropy',\n", + " metrics = ['accuracy'])\n", "\n", "model.summary()" ] @@ -296,7 +279,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.2 - Add callback" + "## Step 5 - Train the model\n", + "### 5.1 Add Callbacks" ] }, { @@ -306,15 +290,16 @@ "outputs": [], "source": [ "os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)\n", - "save_dir = f'{run_dir}/models/best_model.h5'\n", - "savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)" + "save_dir = f'{run_dir}/models/best_model.keras'\n", + "\n", + "savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.1 - Train it" + "### 5.2 - Train it" ] }, { @@ -364,7 +349,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = keras.models.load_model(f'{run_dir}/models/best_model.h5')\n", + "model = keras.models.load_model(f'{run_dir}/models/best_model.keras')\n", "\n", "# ---- Evaluate\n", "score = model.evaluate(x_test, y_test, verbose=0)\n", diff --git a/Embedding.Keras2/03-Prediction.ipynb b/Embedding.Keras3/03-Prediction.ipynb similarity index 84% rename from Embedding.Keras2/03-Prediction.ipynb rename to Embedding.Keras3/03-Prediction.ipynb index f058e29a885f4dcbf8e50c3618fbfed646e4db14..ed1642084b1b904f8f3e88fd53b795b110654a7c 100644 --- a/Embedding.Keras2/03-Prediction.ipynb +++ b/Embedding.Keras3/03-Prediction.ipynb @@ -6,8 +6,8 @@ "source": [ "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n", "\n", - "# <!-- TITLE --> [K2IMDB3] - Reload and reuse a saved model\n", - "<!-- DESC --> Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 2 and Tensorflow (obsolete)\n", + "# <!-- TITLE --> [K3IMDB3] - Reload and reuse a saved model\n", + "<!-- DESC --> Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch\n", "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", "\n", "## Objectives :\n", @@ -34,24 +34,18 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "\n", - "import tensorflow as tf\n", - "import tensorflow.keras as keras\n", - "import tensorflow.keras.datasets.imdb as imdb\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib\n", - "import pandas as pd\n", + "import os\n", + "os.environ['KERAS_BACKEND'] = 'torch'\n", "\n", - "import os,sys,h5py,json,re\n", + "import keras\n", "\n", - "from importlib import reload\n", + "import json,re\n", + "import numpy as np\n", "\n", "import fidle\n", "\n", "# Init Fidle environment\n", - "run_id, run_dir, datasets_dir = fidle.init('K2IMDB3')" + "run_id, run_dir, datasets_dir = fidle.init('K3IMDB3')" ] }, { @@ -75,7 +69,7 @@ "vocab_size = 10000\n", "review_len = 256\n", "\n", - "saved_models = './run/IMDB2'\n", + "saved_models = './run/K3IMDB2'\n", "dictionaries_dir = './data'" ] }, @@ -131,10 +125,8 @@ "source": [ "with open(f'{dictionaries_dir}/word_index.json', 'r') as fp:\n", " word_index = json.load(fp)\n", - " word_index = { w:int(i) for w,i in word_index.items() }\n", - " print('Loaded. ', len(word_index), 'entries in word_index' )\n", " index_word = { i:w for w,i in word_index.items() }\n", - " print('Loaded. ', len(index_word), 'entries in index_word' )" + " print('Dictionaries loaded. ', len(word_index), 'entries' )" ] }, { @@ -152,6 +144,10 @@ "metadata": {}, "outputs": [], "source": [ + "start_char = 1 # Start of a sequence (padding is 0)\n", + "oov_char = 2 # Out-of-vocabulary\n", + "index_from = 3 # First word id\n", + "\n", "nb_reviews = len(reviews)\n", "x_data = []\n", "\n", @@ -159,17 +155,17 @@ "for review in reviews:\n", " print('Words are : ', end='')\n", " # ---- First index must be <start>\n", - " index_review=[1]\n", - " print('1 ', end='')\n", + " index_review=[start_char]\n", + " print(f'{start_char} ', end='')\n", " # ---- For all words\n", " for w in review.split(' '):\n", " # ---- Clean it\n", " w_clean = re.sub(r\"[^a-zA-Z0-9]\", \"\", w)\n", " # ---- Not empty ?\n", " if len(w_clean)>0:\n", - " # ---- Get the index\n", - " w_index = word_index.get(w,2)\n", - " if w_index>vocab_size : w_index=2\n", + " # ---- Get the index - must be inside dict or is out of vocab (oov)\n", + " w_index = word_index.get(w, oov_char)\n", + " if w_index>vocab_size : w_index=oov_char\n", " # ---- Add the index if < vocab_size\n", " index_review.append(w_index)\n", " print(f'{w_index} ', end='')\n", @@ -199,9 +195,9 @@ "\n", "for i in range(nb_reviews):\n", " imax=np.where(x_data[i]==0)[0][0]+5\n", - " print(f'\\nText review :', reviews[i])\n", - " print( f'x_train[{i:}] :', list(x_data[i][:imax]), '(...)')\n", - " print( 'Translation :', translate(x_data[i][:imax]), '(...)')" + " print(f'\\nText review {i} :', reviews[i])\n", + " print(f'tokens vector :', list(x_data[i][:imax]), '(...)')\n", + " print('Translation :', translate(x_data[i][:imax]), '(...)')" ] }, { @@ -217,7 +213,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = keras.models.load_model(f'{saved_models}/models/best_model.h5')" + "model = keras.models.load_model(f'{saved_models}/models/best_model.keras')" ] }, { diff --git a/Embedding.Keras2/04-Show-vectors.ipynb b/Embedding.Keras3/04-Show-vectors.ipynb similarity index 87% rename from Embedding.Keras2/04-Show-vectors.ipynb rename to Embedding.Keras3/04-Show-vectors.ipynb index 3ccd03089d85ba849730ff41d341f51cdbdf0ccb..862242e73586938969830771feb99ff824b8f6b3 100644 --- a/Embedding.Keras2/04-Show-vectors.ipynb +++ b/Embedding.Keras3/04-Show-vectors.ipynb @@ -6,8 +6,8 @@ "source": [ "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n", "\n", - "# <!-- TITLE --> [K2IMDB4] - Reload embedded vectors\n", - "<!-- DESC --> Retrieving embedded vectors from our trained model, using Keras 2 and Tensorflow (obsolete)\n", + "# <!-- TITLE --> [K3IMDB4] - Reload embedded vectors\n", + "<!-- DESC --> Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch\n", "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", "\n", "## Objectives :\n", @@ -33,24 +33,18 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "\n", - "import tensorflow as tf\n", - "import tensorflow.keras as keras\n", - "import tensorflow.keras.datasets.imdb as imdb\n", + "import os\n", + "os.environ['KERAS_BACKEND'] = 'torch'\n", "\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib\n", - "import pandas as pd\n", + "import keras\n", "\n", - "import os,sys,h5py,json,re\n", - "\n", - "from importlib import reload\n", + "import json,re\n", + "import numpy as np\n", "\n", "import fidle\n", "\n", "# Init Fidle environment\n", - "run_id, run_dir, datasets_dir = fidle.init('K2IMDB4')" + "run_id, run_dir, datasets_dir = fidle.init('K3IMDB4')" ] }, { @@ -74,7 +68,7 @@ "vocab_size = 5000\n", "review_len = 256\n", "\n", - "saved_models = './run/IMDB2'\n", + "saved_models = './run/K3IMDB2'\n", "dictionaries_dir = './data'" ] }, @@ -114,14 +108,13 @@ "metadata": {}, "outputs": [], "source": [ - "model = keras.models.load_model(f'{saved_models}/models/best_model.h5')\n", + "model = keras.models.load_model(f'{saved_models}/models/best_model.keras')\n", "print('Model loaded.')\n", "\n", - "with open(f'{dictionaries_dir}/index_word.json', 'r') as fp:\n", - " index_word = json.load(fp)\n", - " index_word = { int(i):w for i,w in index_word.items() }\n", - " word_index = { w:int(i) for i,w in index_word.items() }\n", - " print('Dictionary loaded.')" + "with open(f'{dictionaries_dir}/word_index.json', 'r') as fp:\n", + " word_index = json.load(fp)\n", + " index_word = { i:w for w,i in word_index.items() }\n", + " print('Dictionaries loaded. ', len(word_index), 'entries' )" ] }, { diff --git a/Embedding.Keras2/05-LSTM-Keras.ipynb b/Embedding.Keras3/05-LSTM-Keras.ipynb similarity index 78% rename from Embedding.Keras2/05-LSTM-Keras.ipynb rename to Embedding.Keras3/05-LSTM-Keras.ipynb index 0b5fbef7e2aae25475a529b7e6a9cffc09892101..620c3c86335fd89a1a3a76669f4cdd7dea63b586 100644 --- a/Embedding.Keras2/05-LSTM-Keras.ipynb +++ b/Embedding.Keras3/05-LSTM-Keras.ipynb @@ -6,8 +6,8 @@ "source": [ "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n", "\n", - "# <!-- TITLE --> [K2IMDB5] - Sentiment analysis with a RNN network\n", - "<!-- DESC --> Still the same problem, but with a network combining embedding and RNN, using Keras 2 and Tensorflow (obsolete)\n", + "# <!-- TITLE --> [K3IMDB5] - Sentiment analysis with a RNN network\n", + "<!-- DESC --> Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch\n", "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", "\n", "## Objectives :\n", @@ -40,22 +40,19 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "\n", - "import tensorflow as tf\n", - "import tensorflow.keras as keras\n", - "import tensorflow.keras.datasets.imdb as imdb\n", + "import os\n", + "os.environ['KERAS_BACKEND'] = 'torch'\n", "\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib\n", + "import keras\n", + "import keras.datasets.imdb as imdb\n", "\n", - "import os,sys,h5py,json\n", - "from importlib import reload\n", + "import json,re\n", + "import numpy as np\n", "\n", "import fidle\n", "\n", "# Init Fidle environment\n", - "run_id, run_dir, datasets_dir = fidle.init('K2IMDB5')" + "run_id, run_dir, datasets_dir = fidle.init('K3IMDB5')" ] }, { @@ -135,10 +132,17 @@ "metadata": {}, "outputs": [], "source": [ - "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words=vocab_size, skip_top=hide_most_frequently, seed= 42,)\n", + "# ----- Retrieve x,y\n", + "#\n", + "start_char = 1 # Start of a sequence (padding is 0)\n", + "oov_char = 2 # Out-of-vocabulary\n", + "index_from = 3 # First word id\n", "\n", - "y_train = np.asarray(y_train).astype('float32')\n", - "y_test = np.asarray(y_test ).astype('float32')\n", + "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words = vocab_size, \n", + " skip_top = hide_most_frequently,\n", + " start_char = start_char, \n", + " oov_char = oov_char, \n", + " index_from = index_from)\n", "\n", "# ---- Rescale\n", "#\n", @@ -151,28 +155,8 @@ "#\n", "print(\"Max(x_train,x_test) : \", fidle.utils.rmax([x_train,x_test]) )\n", "print(\"Min(x_train,x_test) : \", fidle.utils.rmin([x_train,x_test]) )\n", - "print(\"x_train : {} y_train : {}\".format(x_train.shape, y_train.shape))\n", - "print(\"x_test : {} y_test : {}\".format(x_test.shape, y_test.shape))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**About this dataset :**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\" Max(x_train,x_test) : \", fidle.utils.rmax([x_train,x_test]) )\n", - "print(\" x_train : {} y_train : {}\".format(x_train.shape, y_train.shape))\n", - "print(\" x_test : {} y_test : {}\".format(x_test.shape, y_test.shape))\n", - "\n", - "print('\\nReview example (x_train[12]) :\\n\\n',x_train[12])" + "print(\"Len(x_train) : \", len(x_train))\n", + "print(\"Len(x_test) : \", len(x_test))" ] }, { @@ -193,22 +177,16 @@ "outputs": [], "source": [ "# ---- Retrieve dictionary {word:index}, and encode it in ascii\n", + "# Shift the dictionary from +3\n", + "# Add <pad>, <start> and <unknown> tags\n", + "# Create a reverse dictionary : {index:word}\n", "#\n", "word_index = imdb.get_word_index()\n", - "\n", - "# ---- Shift the dictionary from +3\n", - "#\n", - "word_index = {w:(i+3) for w,i in word_index.items()}\n", - "\n", - "# ---- Add <pad>, <start> and unknown tags\n", - "#\n", - "word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2} )\n", - "\n", - "# ---- Create a reverse dictionary : {index:word}\n", - "#\n", + "word_index = {w:(i+index_from) for w,i in word_index.items()}\n", + "word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )\n", "index_word = {index:word for word,index in word_index.items()} \n", "\n", - "# ---- Add a nice function to transpose :\n", + "# ---- A nice function to transpose :\n", "#\n", "def dataset2text(review):\n", " return ' '.join([index_word.get(i, '?') for i in review])" @@ -284,34 +262,14 @@ "metadata": {}, "outputs": [], "source": [ - "def get_model(dense_vector_size=128):\n", - " \n", - " model = keras.Sequential()\n", - " model.add(keras.layers.Embedding(input_dim = vocab_size, output_dim = dense_vector_size))\n", - " model.add(keras.layers.GRU(50))\n", - " model.add(keras.layers.Dense(1, activation='sigmoid'))\n", - "\n", - " model.compile(optimizer = 'rmsprop',\n", - " loss = 'binary_crossentropy',\n", - " metrics = ['accuracy'])\n", - " return model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 6 - Train the model\n", - "### 6.1 - Get it" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = get_model(32)\n", + "model = keras.Sequential()\n", + "model.add(keras.layers.Embedding(input_dim = vocab_size, output_dim = dense_vector_size))\n", + "model.add(keras.layers.GRU(50))\n", + "model.add(keras.layers.Dense(1, activation='sigmoid'))\n", + "\n", + "model.compile(optimizer = 'rmsprop',\n", + " loss = 'binary_crossentropy',\n", + " metrics = ['accuracy'])\n", "\n", "model.summary()" ] @@ -320,7 +278,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 6.2 - Add callback" + "## Step 6 - Train the model\n", + "### 6.1 - Add Callbacks" ] }, { @@ -330,16 +289,17 @@ "outputs": [], "source": [ "os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)\n", - "save_dir = f'{run_dir}/models/best_model.h5'\n", - "savemodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)" + "save_dir = f'{run_dir}/models/best_model.keras'\n", + "\n", + "savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_accuracy', mode='max', save_best_only=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 6.3 - Train it\n", - "CPU : batch_size=128, epochs=10 : Need 9'30 (CPU, laptop)" + "### 6.2 - Train it\n", + "Note : With a scale=0.2, batch_size=128, epochs=10 => Need 4' on a cpu laptop" ] }, { @@ -348,18 +308,13 @@ "metadata": {}, "outputs": [], "source": [ - "chrono = fidle.Chrono()\n", - "chrono.start()\n", - "\n", "history = model.fit(x_train,\n", " y_train,\n", " epochs = epochs,\n", " batch_size = batch_size,\n", " validation_data = (x_test, y_test),\n", " verbose = fit_verbosity,\n", - " callbacks = [savemodel_callback])\n", - "\n", - "chrono.show()" + " callbacks = [savemodel_callback])" ] }, { @@ -392,7 +347,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = keras.models.load_model(f'{run_dir}/models/best_model.h5')\n", + "model = keras.models.load_model(f'{run_dir}/models/best_model.keras')\n", "\n", "# ---- Evaluate\n", "score = model.evaluate(x_test, y_test, verbose=0)\n", diff --git a/GTSRB.Keras3/04-Keras-cv.ipynb b/GTSRB.Keras3/04-Keras-cv.ipynb index ad3e9e0047188b820ebceb28729a92c354fa43d6..6c88dc768d1356d4c945d2b18e8516d5519990e4 100644 --- a/GTSRB.Keras3/04-Keras-cv.ipynb +++ b/GTSRB.Keras3/04-Keras-cv.ipynb @@ -6,8 +6,8 @@ "source": [ "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n", "\n", - "# <!-- TITLE --> [K3GTSRB4] - Hight level example\n", - "<!-- DESC --> Episode 4 : An example of using a pre-trained model (Keras-cv)\n", + "# <!-- TITLE --> [K3GTSRB4] - Hight level example (Keras-cv)\n", + "<!-- DESC --> An example of using a pre-trained model with Keras-cv\n", "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", "\n", "## Objectives :\n", @@ -22,7 +22,8 @@ "\n", "## Step 1 - Import and init\n", "\n", - "The python environment required for this notebook is :\n", + "**ATTENTION :** A specific environment is required for this example (Which may require 6 GB). \n", + "This python environment required for this notebook is :\n", "```\n", "python3 -m venv fidle-kcv\n", "pip install --upgrade keras-cv tensorflow torch torchvision torchaudio Matplotlib Jupyterlab\n", diff --git a/MNIST.Keras3/02-CNN-MNIST.ipynb b/MNIST.Keras3/02-CNN-MNIST.ipynb index ae6a3d0283fe06b236ca2c4a0fbb0a5f4697a65e..19c6b220d842a795297699eccda77e4373495a51 100644 --- a/MNIST.Keras3/02-CNN-MNIST.ipynb +++ b/MNIST.Keras3/02-CNN-MNIST.ipynb @@ -55,7 +55,7 @@ "# Init Fidle environment\n", "import fidle\n", "\n", - "run_id, run_dir, datasets_dir = fidle.init('K3MNIST1')" + "run_id, run_dir, datasets_dir = fidle.init('K3MNIST2')" ] }, { diff --git a/README.ipynb b/README.ipynb index 3b01dedb91291280fccca33c03bbd782e2899c50..7f16f1a9afd93eccf0d8e3e5edfb884f8ffa6beb 100644 --- a/README.ipynb +++ b/README.ipynb @@ -3,13 +3,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "9fccea2b", + "id": "1f828036", "metadata": { "execution": { - "iopub.execute_input": "2024-01-16T21:04:27.072205Z", - "iopub.status.busy": "2024-01-16T21:04:27.071380Z", - "iopub.status.idle": "2024-01-16T21:04:27.081582Z", - "shell.execute_reply": "2024-01-16T21:04:27.080749Z" + "iopub.execute_input": "2024-01-21T16:21:09.860108Z", + "iopub.status.busy": "2024-01-21T16:21:09.859792Z", + "iopub.status.idle": "2024-01-21T16:21:09.870962Z", + "shell.execute_reply": "2024-01-21T16:21:09.870075Z" }, "jupyter": { "source_hidden": true @@ -52,7 +52,7 @@ "For more information, you can contact us at : \n", "[<img width=\"200px\" style=\"vertical-align:middle\" src=\"fidle/img/00-Mail_contact.svg\"></img>](#top)\n", "\n", - "Current Version : <!-- VERSION_BEGIN -->2.5.1<!-- VERSION_END -->\n", + "Current Version : <!-- VERSION_BEGIN -->2.5.4<!-- VERSION_END -->\n", "\n", "\n", "## Course materials\n", @@ -73,7 +73,7 @@ "The world of Deep Learning is changing very fast !\n", "\n", "<!-- TOC_BEGIN -->\n", - "<!-- Automatically generated on : 16/01/24 22:04:25 -->\n", + "<!-- Automatically generated on : 21/01/24 17:21:08 -->\n", "\n", "### Linear and logistic regression\n", "- **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb) \n", @@ -117,7 +117,7 @@ "- **[PMNIST1](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)** - [Simple classification with DNN](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb) \n", "Example of classification with a fully connected neural network, using Pytorch\n", "\n", - "### MNIST classification (DNN,CNN), using Lightning\n", + "### MNIST classification (DNN,CNN), using PyTorch/Lightning\n", "- **[LMNIST2](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)** - [Simple classification with DNN](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb) \n", "An example of classification using a dense neural network for the famous MNIST dataset, using PyTorch Lightning\n", "- **[LMNIST2](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)** - [Simple classification with CNN](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb) \n", @@ -130,28 +130,28 @@ "Episode 2 : First convolutions and first classification of our traffic signs, using Keras3\n", "- **[K3GTSRB3](GTSRB.Keras3/03-Better-convolutions.ipynb)** - [Training monitoring](GTSRB.Keras3/03-Better-convolutions.ipynb) \n", "Episode 3 : Monitoring, analysis and check points during a training session, using Keras3\n", - "- **[K3GTSRB4](GTSRB.Keras3/04-Keras-cv.ipynb)** - [Hight level example](GTSRB.Keras3/04-Keras-cv.ipynb) \n", - "Episode 4 : An example of using a pre-trained model (Keras-cv)\n", + "- **[K3GTSRB4](GTSRB.Keras3/04-Keras-cv.ipynb)** - [Hight level example (Keras-cv)](GTSRB.Keras3/04-Keras-cv.ipynb) \n", + "An example of using a pre-trained model with Keras-cv\n", "- **[K3GTSRB10](GTSRB.Keras3/batch_oar.sh)** - [OAR batch script submission](GTSRB.Keras3/batch_oar.sh) \n", "Bash script for an OAR batch submission of an ipython code\n", "- **[K3GTSRB11](GTSRB.Keras3/batch_slurm.sh)** - [SLURM batch script](GTSRB.Keras3/batch_slurm.sh) \n", "Bash script for a Slurm batch submission of an ipython code\n", "\n", - "### Sentiment analysis with word embedding, using Keras2 (obsolete)\n", - "- **[K2IMDB1](Embedding.Keras2/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras2/01-One-hot-encoding.ipynb) \n", - "A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 2 and Tensorflow (obsolete)\n", - "- **[K2IMDB2](Embedding.Keras2/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras2/02-Keras-embedding.ipynb) \n", - "A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 2 and Tensorflow (obsolete)\n", - "- **[K2IMDB3](Embedding.Keras2/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras2/03-Prediction.ipynb) \n", - "Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 2 and Tensorflow (obsolete)\n", - "- **[K2IMDB4](Embedding.Keras2/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras2/04-Show-vectors.ipynb) \n", - "Retrieving embedded vectors from our trained model, using Keras 2 and Tensorflow (obsolete)\n", - "- **[K2IMDB5](Embedding.Keras2/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras2/05-LSTM-Keras.ipynb) \n", - "Still the same problem, but with a network combining embedding and RNN, using Keras 2 and Tensorflow (obsolete)\n", - "\n", - "### Time series with Recurrent Neural Network (RNN), using Keras2 (obsolete)\n", - "- **[K2LADYB1](RNN.Keras2/LADYB1-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras2/LADYB1-Ladybug.ipynb) \n", - "Artificial dataset generation and prediction attempt via a recurrent network, using Keras 2 and Tensorflow (obsolete)\n", + "### Sentiment analysis with word embedding, using Keras3/PyTorch\n", + "- **[K3IMDB1](Embedding.Keras3/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras3/01-One-hot-encoding.ipynb) \n", + "A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch\n", + "- **[K3IMDB2](Embedding.Keras3/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras3/02-Keras-embedding.ipynb) \n", + "A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch\n", + "- **[K3IMDB3](Embedding.Keras3/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras3/03-Prediction.ipynb) \n", + "Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch\n", + "- **[K3IMDB4](Embedding.Keras3/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras3/04-Show-vectors.ipynb) \n", + "Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch\n", + "- **[K3IMDB5](Embedding.Keras3/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras3/05-LSTM-Keras.ipynb) \n", + "Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch\n", + "\n", + "### Time series with Recurrent Neural Network (RNN), using Keras3/PyTorch\n", + "- **[K3LADYB1](RNN.Keras3/01-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras3/01-Ladybug.ipynb) \n", + "Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch\n", "\n", "### Sentiment analysis with transformer, using PyTorch\n", "- **[TRANS1](Transformers.PyTorch/01-Distilbert.ipynb)** - [IMDB, Sentiment analysis with Transformers ](Transformers.PyTorch/01-Distilbert.ipynb) \n", @@ -256,7 +256,7 @@ "from IPython.display import display,Markdown\n", "display(Markdown(open('README.md', 'r').read()))\n", "#\n", - "# This README is visible under Jupiter Lab ;-)# Automatically generated on : 16/01/24 22:04:26" + "# This README is visible under Jupiter Lab ;-)# Automatically generated on : 21/01/24 17:21:08" ] } ], diff --git a/README.md b/README.md index 5393036c2568974352d402c3189935c60562818e..9295f816a0521507969df52c9b933e9f919f58c8 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ For more information, see **https://fidle.cnrs.fr** : For more information, you can contact us at : [<img width="200px" style="vertical-align:middle" src="fidle/img/00-Mail_contact.svg"></img>](#top) -Current Version : <!-- VERSION_BEGIN -->2.5.1<!-- VERSION_END --> +Current Version : <!-- VERSION_BEGIN -->2.5.4<!-- VERSION_END --> ## Course materials @@ -52,7 +52,7 @@ For these reason, they are kept as examples, while we develop the Keras3/PyTorch The world of Deep Learning is changing very fast ! <!-- TOC_BEGIN --> -<!-- Automatically generated on : 16/01/24 22:04:25 --> +<!-- Automatically generated on : 21/01/24 17:21:08 --> ### Linear and logistic regression - **[LINR1](LinearReg/01-Linear-Regression.ipynb)** - [Linear regression with direct resolution](LinearReg/01-Linear-Regression.ipynb) @@ -96,7 +96,7 @@ An example of classification using a convolutional neural network for the famous - **[PMNIST1](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb)** - [Simple classification with DNN](MNIST.PyTorch/01-DNN-MNIST_PyTorch.ipynb) Example of classification with a fully connected neural network, using Pytorch -### MNIST classification (DNN,CNN), using Lightning +### MNIST classification (DNN,CNN), using PyTorch/Lightning - **[LMNIST2](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb)** - [Simple classification with DNN](MNIST.Lightning/01-DNN-MNIST_Lightning.ipynb) An example of classification using a dense neural network for the famous MNIST dataset, using PyTorch Lightning - **[LMNIST2](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb)** - [Simple classification with CNN](MNIST.Lightning/02-CNN-MNIST_Lightning.ipynb) @@ -109,28 +109,28 @@ Episode 1 : Analysis of the GTSRB dataset and creation of an enhanced dataset Episode 2 : First convolutions and first classification of our traffic signs, using Keras3 - **[K3GTSRB3](GTSRB.Keras3/03-Better-convolutions.ipynb)** - [Training monitoring](GTSRB.Keras3/03-Better-convolutions.ipynb) Episode 3 : Monitoring, analysis and check points during a training session, using Keras3 -- **[K3GTSRB4](GTSRB.Keras3/04-Keras-cv.ipynb)** - [Hight level example](GTSRB.Keras3/04-Keras-cv.ipynb) -Episode 4 : An example of using a pre-trained model (Keras-cv) +- **[K3GTSRB4](GTSRB.Keras3/04-Keras-cv.ipynb)** - [Hight level example (Keras-cv)](GTSRB.Keras3/04-Keras-cv.ipynb) +An example of using a pre-trained model with Keras-cv - **[K3GTSRB10](GTSRB.Keras3/batch_oar.sh)** - [OAR batch script submission](GTSRB.Keras3/batch_oar.sh) Bash script for an OAR batch submission of an ipython code - **[K3GTSRB11](GTSRB.Keras3/batch_slurm.sh)** - [SLURM batch script](GTSRB.Keras3/batch_slurm.sh) Bash script for a Slurm batch submission of an ipython code -### Sentiment analysis with word embedding, using Keras2 (obsolete) -- **[K2IMDB1](Embedding.Keras2/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras2/01-One-hot-encoding.ipynb) -A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 2 and Tensorflow (obsolete) -- **[K2IMDB2](Embedding.Keras2/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras2/02-Keras-embedding.ipynb) -A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 2 and Tensorflow (obsolete) -- **[K2IMDB3](Embedding.Keras2/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras2/03-Prediction.ipynb) -Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 2 and Tensorflow (obsolete) -- **[K2IMDB4](Embedding.Keras2/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras2/04-Show-vectors.ipynb) -Retrieving embedded vectors from our trained model, using Keras 2 and Tensorflow (obsolete) -- **[K2IMDB5](Embedding.Keras2/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras2/05-LSTM-Keras.ipynb) -Still the same problem, but with a network combining embedding and RNN, using Keras 2 and Tensorflow (obsolete) - -### Time series with Recurrent Neural Network (RNN), using Keras2 (obsolete) -- **[K2LADYB1](RNN.Keras2/LADYB1-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras2/LADYB1-Ladybug.ipynb) -Artificial dataset generation and prediction attempt via a recurrent network, using Keras 2 and Tensorflow (obsolete) +### Sentiment analysis with word embedding, using Keras3/PyTorch +- **[K3IMDB1](Embedding.Keras3/01-One-hot-encoding.ipynb)** - [Sentiment analysis with hot-one encoding](Embedding.Keras3/01-One-hot-encoding.ipynb) +A basic example of sentiment analysis with sparse encoding, using a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch +- **[K3IMDB2](Embedding.Keras3/02-Keras-embedding.ipynb)** - [Sentiment analysis with text embedding](Embedding.Keras3/02-Keras-embedding.ipynb) +A very classical example of word embedding with a dataset from Internet Movie Database (IMDB), using Keras 3 on PyTorch +- **[K3IMDB3](Embedding.Keras3/03-Prediction.ipynb)** - [Reload and reuse a saved model](Embedding.Keras3/03-Prediction.ipynb) +Retrieving a saved model to perform a sentiment analysis (movie review), using Keras 3 and PyTorch +- **[K3IMDB4](Embedding.Keras3/04-Show-vectors.ipynb)** - [Reload embedded vectors](Embedding.Keras3/04-Show-vectors.ipynb) +Retrieving embedded vectors from our trained model, using Keras 3 and PyTorch +- **[K3IMDB5](Embedding.Keras3/05-LSTM-Keras.ipynb)** - [Sentiment analysis with a RNN network](Embedding.Keras3/05-LSTM-Keras.ipynb) +Still the same problem, but with a network combining embedding and RNN, using Keras 3 and PyTorch + +### Time series with Recurrent Neural Network (RNN), using Keras3/PyTorch +- **[K3LADYB1](RNN.Keras3/01-Ladybug.ipynb)** - [Prediction of a 2D trajectory via RNN](RNN.Keras3/01-Ladybug.ipynb) +Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch ### Sentiment analysis with transformer, using PyTorch - **[TRANS1](Transformers.PyTorch/01-Distilbert.ipynb)** - [IMDB, Sentiment analysis with Transformers ](Transformers.PyTorch/01-Distilbert.ipynb) diff --git a/RNN.Keras2/LADYB1-Ladybug.ipynb b/RNN.Keras3/01-Ladybug.ipynb similarity index 82% rename from RNN.Keras2/LADYB1-Ladybug.ipynb rename to RNN.Keras3/01-Ladybug.ipynb index 806ae1b1f31fe98d0cc7d4361887fb08bbb39ab0..43ed94d101bd6f3ef8c3792c9086ca45da546d61 100644 --- a/RNN.Keras2/LADYB1-Ladybug.ipynb +++ b/RNN.Keras3/01-Ladybug.ipynb @@ -6,8 +6,8 @@ "source": [ "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n", "\n", - "# <!-- TITLE --> [K2LADYB1] - Prediction of a 2D trajectory via RNN\n", - "<!-- DESC --> Artificial dataset generation and prediction attempt via a recurrent network, using Keras 2 and Tensorflow (obsolete)\n", + "# <!-- TITLE --> [K3LADYB1] - Prediction of a 2D trajectory via RNN\n", + "<!-- DESC --> Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch\n", "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", "\n", "## Objectives :\n", @@ -17,7 +17,7 @@ "\n", " - Generate an artificial dataset\n", " - dataset preparation\n", - " - Doing our training\n", + " - Doing our testing\n", " - Making predictions\n", "\n", "## Step 1 - Import and init\n", @@ -30,26 +30,18 @@ "metadata": {}, "outputs": [], "source": [ - "import tensorflow as tf\n", - "from tensorflow import keras\n", - "from tensorflow.keras.callbacks import TensorBoard\n", - "from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator\n", + "import os\n", + "os.environ['KERAS_BACKEND'] = 'torch'\n", "\n", + "import keras\n", "import numpy as np\n", - "import math, random\n", - "from math import sin,cos,pi\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import pandas as pd\n", - "import h5py, json\n", - "import os,time,sys\n", - "\n", - "from importlib import reload\n", + "from math import cos, sin\n", + "import random\n", "\n", "import fidle\n", "\n", "# Init Fidle environment\n", - "run_id, run_dir, datasets_dir = fidle.init('K2LADYB1')" + "run_id, run_dir, datasets_dir = fidle.init('K3LADYB1')" ] }, { @@ -68,7 +60,7 @@ "# ---- About dataset\n", "#\n", "max_t = 1000\n", - "delta_t = 0.02\n", + "delta_t = 0.01\n", "features_len = 2\n", "\n", "\n", @@ -121,7 +113,7 @@ " ladybug_init.params_y = [ random.gauss(0.,1.) for u in range(8)]\n", " \n", "def ladybug_move(t):\n", - " k=0.5\n", + "\n", " [ax1, ax2, ax3, ax4, kx1, kx2, kx3, kx4] = ladybug_init.params_x\n", " [ay1, ay2, ay3, ay4, ky1, ky2, ky3, ky4] = ladybug_init.params_y\n", " \n", @@ -209,7 +201,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 2.4 - Prepare some nice data generator" + "### 2.4 - Prepare sequences from datasets" ] }, { @@ -218,32 +210,26 @@ "metadata": {}, "outputs": [], "source": [ - "# ---- Train generator\n", - "#\n", - "train_generator = TimeseriesGenerator(x_train, x_train, length=sequence_len, batch_size=batch_size)\n", - "test_generator = TimeseriesGenerator(x_test, x_test, length=sequence_len, batch_size=batch_size)\n", - "\n", - "# ---- About\n", + "# ---- Create sequences and labels for train and test\n", "#\n", - "fidle.utils.subtitle('About the splitting of our dataset :')\n", + "xs_train, ys_train=[],[]\n", + "all_i = np.random.permutation( len(x_train) - sequence_len - 1 )\n", "\n", - "x,y=train_generator[0]\n", - "print(f'Number of batch trains available : ', len(train_generator))\n", - "print('batch x shape : ',x.shape)\n", - "print('batch y shape : ',y.shape)\n", + "for i in all_i:\n", + " xs_train.append( x_train[ i : i+sequence_len ] )\n", + " ys_train.append( x_train[ i+sequence_len+1 ] )\n", + " \n", + "xs_test, ys_test=[],[]\n", + "for i in range( len(x_test) - sequence_len - 1):\n", + " xs_test.append( x_test[ i : i+sequence_len ] )\n", + " ys_test.append( x_test[ i+sequence_len+1 ] )\n", "\n", - "x,y=train_generator[0]\n", - "fidle.utils.subtitle('What a batch looks like (x) :')\n", - "fidle.utils.np_print(x[0] )\n", - "fidle.utils.subtitle('What a batch looks like (y) :')\n", - "fidle.utils.np_print(y[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 3 - Create a model" + "# ---- Convert to numpy / float16\n", + " \n", + "xs_train = np.array(xs_train, dtype='float16')\n", + "ys_train = np.array(ys_train, dtype='float16')\n", + "xs_test = np.array(xs_test, dtype='float16')\n", + "ys_test = np.array(ys_test, dtype='float16')\n" ] }, { @@ -252,26 +238,23 @@ "metadata": {}, "outputs": [], "source": [ - "model = keras.models.Sequential()\n", - "model.add( keras.layers.InputLayer(input_shape=(sequence_len, features_len)) )\n", - "model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )\n", - "model.add( keras.layers.Dense(features_len) )\n", + "fidle.utils.subtitle('About the splitting of our dataset :')\n", "\n", - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 4 - Compile and run" + "print('Number of sequences : ', len(xs_train))\n", + "print('xs_train shape : ',xs_train.shape)\n", + "print('ys_train shape : ',ys_train.shape)\n", + "\n", + "fidle.utils.subtitle('What an xs look like :')\n", + "fidle.utils.np_print(xs_train[10] )\n", + "fidle.utils.subtitle('What an ys look like :')\n", + "fidle.utils.np_print(ys_train[10])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 4.1 - Add callback" + "## Step 3 - Create a model" ] }, { @@ -280,17 +263,24 @@ "metadata": {}, "outputs": [], "source": [ - "os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)\n", - "save_path = f'{run_dir}/models/best_model.h5'\n", + "model = keras.models.Sequential()\n", + "model.add( keras.layers.InputLayer(shape=(sequence_len, features_len)) )\n", + "model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )\n", + "model.add( keras.layers.Dense(features_len) )\n", + "\n", + "model.summary()\n", "\n", - "bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_path, verbose=0, save_best_only=True)" + "model.compile(optimizer='rmsprop', \n", + " loss='mse', \n", + " metrics = ['mae'] )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 4.2 - Compile" + "## Step 4 - Train the model\n", + "### 4.1 Add Callbacks" ] }, { @@ -299,17 +289,18 @@ "metadata": {}, "outputs": [], "source": [ - "model.compile(optimizer='rmsprop', \n", - " loss='mse', \n", - " metrics = ['mae'] )" + "os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)\n", + "save_dir = f'{run_dir}/models/best_model.keras'\n", + "\n", + "savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_mae', mode='max', save_best_only=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 4.3 - Fit\n", - "3' with a CPU (laptop) " + "### 4.2 - Train it\n", + "Need 3' on a cpu laptop" ] }, { @@ -321,11 +312,11 @@ "chrono=fidle.Chrono()\n", "chrono.start()\n", "\n", - "history=model.fit(train_generator, \n", + "history=model.fit(xs_train,ys_train,\n", " epochs = epochs, \n", " verbose = fit_verbosity,\n", - " validation_data = test_generator,\n", - " callbacks = [bestmodel_callback])\n", + " validation_data = (xs_test, ys_test),\n", + " callbacks = [savemodel_callback])\n", "\n", "chrono.show()" ] @@ -359,7 +350,7 @@ "metadata": {}, "outputs": [], "source": [ - "loaded_model = tf.keras.models.load_model(save_path)\n", + "loaded_model = keras.models.load_model(f'{run_dir}/models/best_model.keras')\n", "print('Loaded.')" ] }, diff --git a/fidle/about.yml b/fidle/about.yml index ae56d2cb4b20b5551777093b06ba2af5e53ca19e..810344c14f0ca5913f817ac6631e76a8a2bf7feb 100644 --- a/fidle/about.yml +++ b/fidle/about.yml @@ -13,7 +13,7 @@ # # This file describes the notebooks used by the Fidle training. -version: 2.5.1 +version: 2.5.4 content: notebooks name: Notebooks Fidle description: All notebooks used by the Fidle training @@ -32,10 +32,10 @@ toc: Wine.Lightning: Wine Quality prediction (DNN), using PyTorch/Lightning MNIST.Keras3: MNIST classification (DNN,CNN), using Keras3/PyTorch MNIST.PyTorch: MNIST classification (DNN,CNN), using PyTorch - MNIST.Lightning: MNIST classification (DNN,CNN), using Lightning + MNIST.Lightning: MNIST classification (DNN,CNN), using PyTorch/Lightning GTSRB.Keras3: Images classification GTSRB with Convolutional Neural Networks (CNN), using Keras3/PyTorch - Embedding.Keras2: Sentiment analysis with word embedding, using Keras2 (obsolete) - RNN.Keras2: Time series with Recurrent Neural Network (RNN), using Keras2 (obsolete) + Embedding.Keras3: Sentiment analysis with word embedding, using Keras3/PyTorch + RNN.Keras3: Time series with Recurrent Neural Network (RNN), using Keras3/PyTorch Transformers.PyTorch: Sentiment analysis with transformer, using PyTorch AE.Keras2: Unsupervised learning with an autoencoder neural network (AE), using Keras2 (obsolete) VAE.Keras2: Generative network with Variational Autoencoder (VAE), using Keras2 (obsolete) diff --git a/fidle/ci/default.yml b/fidle/ci/default.yml index 46dc911ae36ad16f3a160ae8adc8942d162da648..f8c8ee262b73969cd942d874ae066cc5751674e5 100644 --- a/fidle/ci/default.yml +++ b/fidle/ci/default.yml @@ -1,6 +1,6 @@ campain: version: '1.0' - description: Automatically generated ci profile (16/01/24 22:04:25) + description: Automatically generated ci profile (21/01/24 17:21:08) directory: ./campains/default existing_notebook: 'remove # remove|skip' report_template: 'fidle # fidle|default' @@ -116,18 +116,18 @@ K3GTSRB4: notebook: GTSRB.Keras3/04-Keras-cv.ipynb # -# ------------ Embedding.Keras2 +# ------------ Embedding.Keras3 # -K2IMDB1: - notebook: Embedding.Keras2/01-One-hot-encoding.ipynb +K3IMDB1: + notebook: Embedding.Keras3/01-One-hot-encoding.ipynb overrides: vocab_size: default hide_most_frequently: default batch_size: default epochs: default fit_verbosity: default -K2IMDB2: - notebook: Embedding.Keras2/02-Keras-embedding.ipynb +K3IMDB2: + notebook: Embedding.Keras3/02-Keras-embedding.ipynb overrides: vocab_size: default hide_most_frequently: default @@ -137,22 +137,22 @@ K2IMDB2: epochs: default output_dir: default fit_verbosity: default -K2IMDB3: - notebook: Embedding.Keras2/03-Prediction.ipynb +K3IMDB3: + notebook: Embedding.Keras3/03-Prediction.ipynb overrides: vocab_size: default review_len: default saved_models: default dictionaries_dir: default -K2IMDB4: - notebook: Embedding.Keras2/04-Show-vectors.ipynb +K3IMDB4: + notebook: Embedding.Keras3/04-Show-vectors.ipynb overrides: vocab_size: default review_len: default saved_models: default dictionaries_dir: default -K2IMDB5: - notebook: Embedding.Keras2/05-LSTM-Keras.ipynb +K3IMDB5: + notebook: Embedding.Keras3/05-LSTM-Keras.ipynb overrides: vocab_size: default hide_most_frequently: default @@ -164,10 +164,10 @@ K2IMDB5: scale: default # -# ------------ RNN.Keras2 +# ------------ RNN.Keras3 # -K2LADYB1: - notebook: RNN.Keras2/LADYB1-Ladybug.ipynb +K3LADYB1: + notebook: RNN.Keras3/01-Ladybug.ipynb overrides: scale: default train_prop: default