diff --git a/IMDB/01-Embedding-Keras.ipynb b/IMDB/01-Embedding-Keras.ipynb index e99069945cac27f00393e7f7503091b437ff606c..567cf00627ee082bb399e8dadc999d7eed5f7a65 100644 --- a/IMDB/01-Embedding-Keras.ipynb +++ b/IMDB/01-Embedding-Keras.ipynb @@ -169,13 +169,20 @@ "\n", "# ----- Retrieve x,y\n", "#\n", + "# Choose if you want to load dataset directly from keras (small size <20M)\n", "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words = vocab_size,\n", " skip_top = 0,\n", " maxlen = None,\n", " seed = 42,\n", " start_char = 1,\n", " oov_char = 2,\n", - " index_from = 3, )" + " index_from = 3, )\n", + "# Or you can use the same pre-loaded dataset\n", + "#with h5py.File(f'{dataset_dir}/dataset_imdb.h5','r') as f:\n", + "# x_train = f['x_train'][:]\n", + "# y_train = f['y_train'][:]\n", + "# x_test = f['x_test'][:]\n", + "# y_test = f['y_test'][:]" ] }, { @@ -390,7 +397,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Save dataset and dictionary (For future use)" + "### Save dataset and dictionary (For future use but not mandatory if at GRICAD or IDRIS)" ] }, { @@ -407,15 +414,11 @@ } ], "source": [ - "# ---- To write dataset in the project place\n", - "#\n", - "output_dir = dataset_dir\n", - "\n", - "# ---- To write h5 dataset in a test place\n", + "# ---- To write h5 dataset in a test place (optional)\n", "# For small tests only !\n", "#\n", - "# output_dir = './data'\n", - "# ooo.mkdir(output_dir)\n", + "output_dir = './data'\n", + "ooo.mkdir(output_dir)\n", "\n", "\n", "with h5py.File(f'{output_dir}/dataset_imdb.h5', 'w') as f:\n", diff --git a/IMDB/03-LSTM-Keras.ipynb b/IMDB/03-LSTM-Keras.ipynb index 869ffdca5cb34e7341b52c59eaf3ed901b38b2c5..568463208784270e60d54bf2e3b3c693dc9a0961 100644 --- a/IMDB/03-LSTM-Keras.ipynb +++ b/IMDB/03-LSTM-Keras.ipynb @@ -103,13 +103,20 @@ "\n", "# ----- Retrieve x,y\n", "#\n", + "# Choose if you want to load dataset directly from keras (small size <20M)\n", "(x_train, y_train), (x_test, y_test) = imdb.load_data( num_words = vocab_size,\n", " skip_top = 0,\n", " maxlen = None,\n", " seed = 42,\n", " start_char = 1,\n", " oov_char = 2,\n", - " index_from = 3, )" + " index_from = 3, )\n", + "# Or you can use the same pre-loaded dataset if at GRICAD or IDRIS\n", + "#with h5py.File(f'{dataset_dir}/dataset_imdb.h5','r') as f:\n", + "# x_train = f['x_train'][:]\n", + "# y_train = f['y_train'][:]\n", + "# x_test = f['x_test'][:]\n", + "# y_test = f['y_test'][:]\n" ] }, { @@ -231,7 +238,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Save dataset and dictionary (can be usefull)" + "### Save dataset and dictionary (can be usefull but not mandatory if at GRICAD or IDRIS)" ] }, {