From e128f5703b37df4da4499b8ccb1164c428890243 Mon Sep 17 00:00:00 2001
From: Soraya Arias <soraya.arias@inria.fr>
Date: Fri, 6 Mar 2020 16:38:54 +0100
Subject: [PATCH] Propose to load the preloaded data in a comment

Former-commit-id: 0f4260eed78a606ee14053b984dce7e8321a730c
---
 IMDB/01-Embedding-Keras.ipynb | 21 ++++++++++++---------
 IMDB/03-LSTM-Keras.ipynb      | 11 +++++++++--
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/IMDB/01-Embedding-Keras.ipynb b/IMDB/01-Embedding-Keras.ipynb
index e990699..567cf00 100644
--- a/IMDB/01-Embedding-Keras.ipynb
+++ b/IMDB/01-Embedding-Keras.ipynb
@@ -169,13 +169,20 @@
     "\n",
     "# ----- Retrieve x,y\n",
     "#\n",
+    "# Choose if you want to load dataset directly from keras (small size <20M)\n",
     "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,\n",
     "                                                       skip_top   = 0,\n",
     "                                                       maxlen     = None,\n",
     "                                                       seed       = 42,\n",
     "                                                       start_char = 1,\n",
     "                                                       oov_char   = 2,\n",
-    "                                                       index_from = 3, )"
+    "                                                       index_from = 3, )\n",
+    "# Or you can use the same pre-loaded dataset\n",
+    "#with  h5py.File(f'{dataset_dir}/dataset_imdb.h5','r') as f:\n",
+    "#        x_train = f['x_train'][:]\n",
+    "#        y_train = f['y_train'][:]\n",
+    "#        x_test  = f['x_test'][:]\n",
+    "#        y_test  = f['y_test'][:]"
    ]
   },
   {
@@ -390,7 +397,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Save dataset and dictionary (For future use)"
+    "### Save dataset and dictionary (For future use but not mandatory if at GRICAD or IDRIS)"
    ]
   },
   {
@@ -407,15 +414,11 @@
     }
    ],
    "source": [
-    "# ---- To write dataset in the project place\n",
-    "#\n",
-    "output_dir = dataset_dir\n",
-    "\n",
-    "# ---- To write h5 dataset in a test place\n",
+    "# ---- To write h5 dataset in a test place (optional)\n",
     "#      For small tests only !\n",
     "#\n",
-    "# output_dir = './data'\n",
-    "# ooo.mkdir(output_dir)\n",
+    "output_dir = './data'\n",
+    "ooo.mkdir(output_dir)\n",
     "\n",
     "\n",
     "with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:\n",
diff --git a/IMDB/03-LSTM-Keras.ipynb b/IMDB/03-LSTM-Keras.ipynb
index 869ffdc..5684632 100644
--- a/IMDB/03-LSTM-Keras.ipynb
+++ b/IMDB/03-LSTM-Keras.ipynb
@@ -103,13 +103,20 @@
     "\n",
     "# ----- Retrieve x,y\n",
     "#\n",
+    "# Choose if you want to load dataset directly from keras (small size <20M)\n",
     "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words  = vocab_size,\n",
     "                                                       skip_top   = 0,\n",
     "                                                       maxlen     = None,\n",
     "                                                       seed       = 42,\n",
     "                                                       start_char = 1,\n",
     "                                                       oov_char   = 2,\n",
-    "                                                       index_from = 3, )"
+    "                                                       index_from = 3, )\n",
+    "# Or you can use the same pre-loaded dataset if at GRICAD or IDRIS\n",
+    "#with  h5py.File(f'{dataset_dir}/dataset_imdb.h5','r') as f:\n",
+    "#        x_train = f['x_train'][:]\n",
+    "#        y_train = f['y_train'][:]\n",
+    "#        x_test  = f['x_test'][:]\n",
+    "#        y_test  = f['y_test'][:]\n"
    ]
   },
   {
@@ -231,7 +238,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Save dataset and dictionary (can be usefull)"
+    "### Save dataset and dictionary (can be usefull but not mandatory if at GRICAD or IDRIS)"
    ]
   },
   {
-- 
GitLab