From b9b68a75b560d0a83061830fde319dc83a1f1c8a Mon Sep 17 00:00:00 2001 From: Jean-Luc Parouty <Jean-Luc.Parouty@simap.grenoble-inp.fr> Date: Thu, 13 Jan 2022 18:20:07 +0100 Subject: [PATCH] Minors updates on SYNOP --- Misc/Scratchbook.ipynb | 148 ++++++++++++++++++++++--- SYNOP/LADYB1-Ladybug.ipynb | 24 ++-- SYNOP/SYNOP1-Preparation-of-data.ipynb | 48 ++++++-- SYNOP/SYNOP2-First-predictions.ipynb | 8 +- SYNOP/SYNOP3-12h-predictions.ipynb | 4 +- 5 files changed, 195 insertions(+), 37 deletions(-) diff --git a/Misc/Scratchbook.ipynb b/Misc/Scratchbook.ipynb index 4db3b4f..f39fc5b 100644 --- a/Misc/Scratchbook.ipynb +++ b/Misc/Scratchbook.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "floppy-organic", "metadata": {}, "outputs": [], @@ -53,10 +53,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "opposite-plasma", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Inputs shape is : (32, 20, 8)\n", + "Output shape is : (32, 16)\n" + ] + } + ], "source": [ "inputs = tf.random.normal([32, 20, 8])\n", "\n", @@ -69,12 +78,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "forbidden-murray", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Output shape : (32, 20, 18)\n", + "Memory state : (32, 18)\n", + "Carry state : (32, 18)\n" + ] + } + ], "source": [ - "lstm = tf.keras.layers.LSTM(20, return_sequences=True, return_state=True)\n", + "lstm = tf.keras.layers.LSTM(18, return_sequences=True, return_state=True)\n", "\n", "output, memory_state, carry_state = lstm(inputs)\n", "\n", @@ -85,10 +104,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "verified-fruit", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "<tf.Tensor: shape=(18,), dtype=float32, numpy=\n", + "array([-0.20923303, 0.00193496, 0.05929745, 0.0429938 , -0.02835345,\n", + " 0.14096233, 0.07420755, 0.1777523 , 0.1205566 , -0.03841979,\n", + " -0.02402029, 0.16098973, 0.10468155, -0.06480312, -0.02497844,\n", + " 0.09700071, -0.24351674, 0.04884451], dtype=float32)>" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# --- See the last vector of the output\n", "output[-1,-1]" @@ -96,10 +130,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "homeless-library", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "<tf.Tensor: shape=(18,), dtype=float32, numpy=\n", + "array([-0.20923303, 0.00193496, 0.05929745, 0.0429938 , -0.02835345,\n", + " 0.14096233, 0.07420755, 0.1777523 , 0.1205566 , -0.03841979,\n", + " -0.02402029, 0.16098973, 0.10468155, -0.06480312, -0.02497844,\n", + " 0.09700071, -0.24351674, 0.04884451], dtype=float32)>" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# ---- Memory state is the last output\n", "memory_state[-1]" @@ -107,18 +156,87 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "preliminary-psychiatry", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "<tf.Tensor: shape=(18,), dtype=float32, numpy=\n", + "array([-0.3245376 , 0.00296011, 0.13041827, 0.10711877, -0.05223516,\n", + " 0.4009896 , 0.21599025, 0.4260387 , 0.30799934, -0.0799172 ,\n", + " -0.06359857, 0.29457492, 0.18084048, -0.14462015, -0.04707906,\n", + " 0.15726675, -0.38622206, 0.09004797], dtype=float32)>" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "carry_state[-1]" ] }, + { + "cell_type": "markdown", + "id": "41d326b2-376e-49d6-9429-07016d98dc09", + "metadata": {}, + "source": [ + "## 2 - TimeseriesGenerator" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "42276389-4ea6-42d1-93bc-6650062ef86a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of batch : 10\n", + "\n", + "#0 : [[1 2 3 4 5]] => [6]\n", + "#1 : [[2 3 4 5 6]] => [7]\n", + "#2 : [[3 4 5 6 7]] => [8]\n", + "#3 : [[4 5 6 7 8]] => [9]\n", + "#4 : [[5 6 7 8 9]] => [10]\n", + "#5 : [[ 6 7 8 9 10]] => [11]\n", + "#6 : [[ 7 8 9 10 11]] => [12]\n", + "#7 : [[ 8 9 10 11 12]] => [13]\n", + "#8 : [[ 9 10 11 12 13]] => [14]\n", + "#9 : [[10 11 12 13 14]] => [15]\n" + ] + } + ], + "source": [ + "from keras.preprocessing.sequence import TimeseriesGenerator\n", + "\n", + "# ---- Define a dataset\n", + "\n", + "series = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])\n", + "\n", + "# ---- Generator\n", + "\n", + "generator = TimeseriesGenerator(series, series, length=5, batch_size=1)\n", + "\n", + "# ---- Samples\n", + "\n", + "nb_batch = len(generator)\n", + "\n", + "print(f'Number of batch : {nb_batch}\\n')\n", + "for i in range(nb_batch):\n", + " x, y = generator[i]\n", + " print(f'#{i} : {x} => {y}')" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "heard-processing", + "id": "4d94892b-d3a5-448d-aa2b-28c3a01a4b72", "metadata": {}, "outputs": [], "source": [] @@ -126,7 +244,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -140,7 +258,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/SYNOP/LADYB1-Ladybug.ipynb b/SYNOP/LADYB1-Ladybug.ipynb index 8c6598f..6f6be84 100644 --- a/SYNOP/LADYB1-Ladybug.ipynb +++ b/SYNOP/LADYB1-Ladybug.ipynb @@ -129,7 +129,6 @@ " x = ax1*sin(t*(kx1+20)) + ax2*cos(t*(kx2+10)) + ax3*sin(t*(kx3+5)) + ax4*cos(t*(kx4+5))\n", " y = ay1*cos(t*(ky1+20)) + ay2*sin(t*(ky2+10)) + ay3*cos(t*(ky3+5)) + ay4*sin(t*(ky4+5)) \n", "\n", - "\n", " return x,y" ] }, @@ -152,9 +151,8 @@ "x,y = 0,0\n", "positions=[]\n", "for t in np.arange(0., max_t, delta_t):\n", - " positions.append([x,y])\n", " x,y = ladybug_move(t)\n", - "# (x,y) = (x+dx, y+dy)\n", + " positions.append([x,y])\n", "\n", "# ---- Build rescaled dataset\n", "#\n", @@ -257,7 +255,6 @@ "source": [ "model = keras.models.Sequential()\n", "model.add( keras.layers.InputLayer(input_shape=(sequence_len, features_len)) )\n", - "# model.add( keras.layers.GRU(200, dropout=.1, recurrent_dropout=0.5, return_sequences=False, activation='relu') )\n", "model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )\n", "model.add( keras.layers.Dense(features_len) )\n", "\n", @@ -284,9 +281,10 @@ "metadata": {}, "outputs": [], "source": [ - "pwk.mkdir('./run/models')\n", - "save_dir = './run/models/best_model.h5'\n", - "bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)" + "os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)\n", + "save_path = f'{run_dir}/models/best_model.h5'\n", + "\n", + "bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_path, verbose=0, save_best_only=True)" ] }, { @@ -361,7 +359,7 @@ "metadata": {}, "outputs": [], "source": [ - "loaded_model = tf.keras.models.load_model('./run/models/best_model.h5')\n", + "loaded_model = tf.keras.models.load_model(save_path)\n", "print('Loaded.')" ] }, @@ -386,6 +384,10 @@ "\n", "sequence_pred = loaded_model.predict( np.array([sequence]) )\n", "\n", + "print('sequence shape :',sequence.shape)\n", + "print('sequence true shape :',sequence_true.shape)\n", + "print('sequence pred shape :',sequence_pred.shape)\n", + "\n", "pwk.plot_2d_segment(sequence_true, sequence_pred)\n", "pwk.plot_multivariate_serie(sequence_true, predictions=sequence_pred, labels=['Axis=0', 'Axis=1'],save_as='04-one-step-prediction')" ] @@ -395,7 +397,8 @@ "metadata": {}, "source": [ "### 5.3 - Make n-steps prediction\n", - "A longer term prediction, via a nice iteration function :" + "A longer term prediction, via a nice iteration function \n", + "We will perform <iteration> predictions to iteratively build our prediction." ] }, { @@ -471,7 +474,8 @@ "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387" }, "kernelspec": { - "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)", + "display_name": "Python 3 (ipykernel)", + "language": "python", "name": "python3" }, "language_info": { diff --git a/SYNOP/SYNOP1-Preparation-of-data.ipynb b/SYNOP/SYNOP1-Preparation-of-data.ipynb index 5f09b9f..081e533 100644 --- a/SYNOP/SYNOP1-Preparation-of-data.ipynb +++ b/SYNOP/SYNOP1-Preparation-of-data.ipynb @@ -124,7 +124,7 @@ "metadata": {}, "source": [ "### 3.1 - Read dataset description\n", - "We need the list and description of the columns." + "Get columns names of the dataset from the schema description" ] }, { @@ -157,16 +157,20 @@ "display(df.tail(10))\n", "\n", "# ---- Get the columns name as descriptions\n", + "#\n", "synop_desc = list(df.columns)\n", "\n", "# ---- Set Codes as columns name\n", + "#\n", "df.columns = synop_codes\n", "code2desc = dict(zip(synop_codes, synop_desc))\n", "\n", "# ---- Count the na values by columns\n", + "#\n", "columns_na = df.isna().sum().tolist()\n", "\n", "# ---- Show all of that\n", + "#\n", "df_desc=pd.DataFrame({'Code':synop_codes, 'Description':synop_desc, 'Na':columns_na})\n", "\n", "pwk.subtitle('List of columns :')\n", @@ -199,22 +203,49 @@ "# ---- Show all of that\n", "\n", "pwk.subtitle('Our selected columns :')\n", - "display(df.head(20))\n", - "\n", + "display(df.head(20))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2 - Few stats\n", + "Note : We note that per column is constant, so we can drop it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "pwk.subtitle('Few statistics :')\n", "display(df.describe().style.format('{:.2f}'))\n", "\n", "# ---- 'per' column is constant, we can drop it\n", "\n", - "df.drop(['per'],axis=1,inplace=True)\n" + "df.drop(['per'],axis=1,inplace=True)\n", + "\n", + "# ---- Count the na values by columns\n", + "#\n", + "dataset_na = df.isna().sum().tolist()\n", + "dataset_cols = df.columns.tolist()\n", + "dataset_desc = [ code2desc[c] for c in dataset_cols ]\n", + "\n", + "# ---- Show all of that\n", + "#\n", + "pwk.subtitle('Do we have na values ?')\n", + "df_desc=pd.DataFrame({'Columns':dataset_cols, 'Description':dataset_desc, 'Na':dataset_na})\n", + "display(df_desc.style.set_properties(**{'text-align': 'left'}))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 4.2 - Cleanup dataset\n", - "Let's sort it and cook up some NaN values" + "### 4.3 - Cleanup dataset\n", + "Let's sort it and cook up NaN values with an interpolation" ] }, { @@ -259,11 +290,13 @@ "outputs": [], "source": [ "# ---- Count the na values by columns\n", + "#\n", "dataset_na = df.isna().sum().tolist()\n", "dataset_cols = df.columns.tolist()\n", "dataset_desc = [ code2desc[c] for c in dataset_cols ]\n", "\n", "# ---- Show all of that\n", + "#\n", "df_desc=pd.DataFrame({'Columns':dataset_cols, 'Description':dataset_desc, 'Na':dataset_na})\n", "pwk.subtitle('Dataset columns :')\n", "display(df_desc.style.set_properties(**{'text-align': 'left'}))\n", @@ -345,7 +378,8 @@ "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387" }, "kernelspec": { - "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)", + "display_name": "Python 3 (ipykernel)", + "language": "python", "name": "python3" }, "language_info": { diff --git a/SYNOP/SYNOP2-First-predictions.ipynb b/SYNOP/SYNOP2-First-predictions.ipynb index 1d26012..c4fc1e8 100644 --- a/SYNOP/SYNOP2-First-predictions.ipynb +++ b/SYNOP/SYNOP2-First-predictions.ipynb @@ -165,6 +165,7 @@ "outputs": [], "source": [ "# ---- Train generator\n", + "\n", "train_generator = TimeseriesGenerator(dataset_train, dataset_train, length=sequence_len, batch_size=batch_size)\n", "test_generator = TimeseriesGenerator(dataset_test, dataset_test, length=sequence_len, batch_size=batch_size)\n", "\n", @@ -178,9 +179,9 @@ "print('batch y shape : ',y.shape)\n", "\n", "x,y=train_generator[0]\n", - "pwk.subtitle('What a batch looks like (x) :')\n", + "pwk.subtitle('What a batch looks like (x[0]) :')\n", "pwk.np_print(x[0] )\n", - "pwk.subtitle('What a batch looks like (y) :')\n", + "pwk.subtitle('What a batch looks like (y[0]) :')\n", "pwk.np_print(y[0])" ] }, @@ -401,7 +402,8 @@ "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387" }, "kernelspec": { - "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)", + "display_name": "Python 3 (ipykernel)", + "language": "python", "name": "python3" }, "language_info": { diff --git a/SYNOP/SYNOP3-12h-predictions.ipynb b/SYNOP/SYNOP3-12h-predictions.ipynb index 40d9c3f..cd0f27f 100644 --- a/SYNOP/SYNOP3-12h-predictions.ipynb +++ b/SYNOP/SYNOP3-12h-predictions.ipynb @@ -308,7 +308,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -322,7 +322,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.7" } }, "nbformat": 4, -- GitLab