From b9b68a75b560d0a83061830fde319dc83a1f1c8a Mon Sep 17 00:00:00 2001
From: Jean-Luc Parouty <Jean-Luc.Parouty@simap.grenoble-inp.fr>
Date: Thu, 13 Jan 2022 18:20:07 +0100
Subject: [PATCH] Minors updates on SYNOP

---
 Misc/Scratchbook.ipynb                 | 148 ++++++++++++++++++++++---
 SYNOP/LADYB1-Ladybug.ipynb             |  24 ++--
 SYNOP/SYNOP1-Preparation-of-data.ipynb |  48 ++++++--
 SYNOP/SYNOP2-First-predictions.ipynb   |   8 +-
 SYNOP/SYNOP3-12h-predictions.ipynb     |   4 +-
 5 files changed, 195 insertions(+), 37 deletions(-)

diff --git a/Misc/Scratchbook.ipynb b/Misc/Scratchbook.ipynb
index 4db3b4f..f39fc5b 100644
--- a/Misc/Scratchbook.ipynb
+++ b/Misc/Scratchbook.ipynb
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "floppy-organic",
    "metadata": {},
    "outputs": [],
@@ -53,10 +53,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "opposite-plasma",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Inputs shape is :  (32, 20, 8)\n",
+      "Output shape is :  (32, 16)\n"
+     ]
+    }
+   ],
    "source": [
     "inputs  = tf.random.normal([32, 20, 8])\n",
     "\n",
@@ -69,12 +78,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "forbidden-murray",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Output shape :  (32, 20, 18)\n",
+      "Memory state :  (32, 18)\n",
+      "Carry  state :  (32, 18)\n"
+     ]
+    }
+   ],
    "source": [
-    "lstm = tf.keras.layers.LSTM(20, return_sequences=True, return_state=True)\n",
+    "lstm = tf.keras.layers.LSTM(18, return_sequences=True, return_state=True)\n",
     "\n",
     "output, memory_state, carry_state = lstm(inputs)\n",
     "\n",
@@ -85,10 +104,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "verified-fruit",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<tf.Tensor: shape=(18,), dtype=float32, numpy=\n",
+       "array([-0.20923303,  0.00193496,  0.05929745,  0.0429938 , -0.02835345,\n",
+       "        0.14096233,  0.07420755,  0.1777523 ,  0.1205566 , -0.03841979,\n",
+       "       -0.02402029,  0.16098973,  0.10468155, -0.06480312, -0.02497844,\n",
+       "        0.09700071, -0.24351674,  0.04884451], dtype=float32)>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# --- See the last vector of the output\n",
     "output[-1,-1]"
@@ -96,10 +130,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "homeless-library",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<tf.Tensor: shape=(18,), dtype=float32, numpy=\n",
+       "array([-0.20923303,  0.00193496,  0.05929745,  0.0429938 , -0.02835345,\n",
+       "        0.14096233,  0.07420755,  0.1777523 ,  0.1205566 , -0.03841979,\n",
+       "       -0.02402029,  0.16098973,  0.10468155, -0.06480312, -0.02497844,\n",
+       "        0.09700071, -0.24351674,  0.04884451], dtype=float32)>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# ---- Memory state is the last output\n",
     "memory_state[-1]"
@@ -107,18 +156,87 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "preliminary-psychiatry",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<tf.Tensor: shape=(18,), dtype=float32, numpy=\n",
+       "array([-0.3245376 ,  0.00296011,  0.13041827,  0.10711877, -0.05223516,\n",
+       "        0.4009896 ,  0.21599025,  0.4260387 ,  0.30799934, -0.0799172 ,\n",
+       "       -0.06359857,  0.29457492,  0.18084048, -0.14462015, -0.04707906,\n",
+       "        0.15726675, -0.38622206,  0.09004797], dtype=float32)>"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "carry_state[-1]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "41d326b2-376e-49d6-9429-07016d98dc09",
+   "metadata": {},
+   "source": [
+    "## 2 - TimeseriesGenerator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "42276389-4ea6-42d1-93bc-6650062ef86a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of batch : 10\n",
+      "\n",
+      "#0 : [[1 2 3 4 5]] => [6]\n",
+      "#1 : [[2 3 4 5 6]] => [7]\n",
+      "#2 : [[3 4 5 6 7]] => [8]\n",
+      "#3 : [[4 5 6 7 8]] => [9]\n",
+      "#4 : [[5 6 7 8 9]] => [10]\n",
+      "#5 : [[ 6  7  8  9 10]] => [11]\n",
+      "#6 : [[ 7  8  9 10 11]] => [12]\n",
+      "#7 : [[ 8  9 10 11 12]] => [13]\n",
+      "#8 : [[ 9 10 11 12 13]] => [14]\n",
+      "#9 : [[10 11 12 13 14]] => [15]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from keras.preprocessing.sequence import TimeseriesGenerator\n",
+    "\n",
+    "# ---- Define a dataset\n",
+    "\n",
+    "series = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])\n",
+    "\n",
+    "# ---- Generator\n",
+    "\n",
+    "generator = TimeseriesGenerator(series, series, length=5, batch_size=1)\n",
+    "\n",
+    "# ---- Samples\n",
+    "\n",
+    "nb_batch = len(generator)\n",
+    "\n",
+    "print(f'Number of batch : {nb_batch}\\n')\n",
+    "for i in range(nb_batch):\n",
+    "    x, y = generator[i]\n",
+    "    print(f'#{i} : {x} => {y}')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "heard-processing",
+   "id": "4d94892b-d3a5-448d-aa2b-28c3a01a4b72",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -126,7 +244,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -140,7 +258,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/SYNOP/LADYB1-Ladybug.ipynb b/SYNOP/LADYB1-Ladybug.ipynb
index 8c6598f..6f6be84 100644
--- a/SYNOP/LADYB1-Ladybug.ipynb
+++ b/SYNOP/LADYB1-Ladybug.ipynb
@@ -129,7 +129,6 @@
     "    x = ax1*sin(t*(kx1+20)) + ax2*cos(t*(kx2+10)) + ax3*sin(t*(kx3+5)) + ax4*cos(t*(kx4+5))\n",
     "    y = ay1*cos(t*(ky1+20)) + ay2*sin(t*(ky2+10)) + ay3*cos(t*(ky3+5)) + ay4*sin(t*(ky4+5)) \n",
     "\n",
-    "\n",
     "    return x,y"
    ]
   },
@@ -152,9 +151,8 @@
     "x,y = 0,0\n",
     "positions=[]\n",
     "for t in np.arange(0., max_t, delta_t):\n",
-    "    positions.append([x,y])\n",
     "    x,y = ladybug_move(t)\n",
-    "#     (x,y) = (x+dx, y+dy)\n",
+    "    positions.append([x,y])\n",
     "\n",
     "# ---- Build rescaled dataset\n",
     "#\n",
@@ -257,7 +255,6 @@
    "source": [
     "model = keras.models.Sequential()\n",
     "model.add( keras.layers.InputLayer(input_shape=(sequence_len, features_len)) )\n",
-    "# model.add( keras.layers.GRU(200, dropout=.1, recurrent_dropout=0.5, return_sequences=False, activation='relu') )\n",
     "model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )\n",
     "model.add( keras.layers.Dense(features_len) )\n",
     "\n",
@@ -284,9 +281,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pwk.mkdir('./run/models')\n",
-    "save_dir = './run/models/best_model.h5'\n",
-    "bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_dir, verbose=0, save_best_only=True)"
+    "os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)\n",
+    "save_path = f'{run_dir}/models/best_model.h5'\n",
+    "\n",
+    "bestmodel_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_path, verbose=0, save_best_only=True)"
    ]
   },
   {
@@ -361,7 +359,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "loaded_model = tf.keras.models.load_model('./run/models/best_model.h5')\n",
+    "loaded_model = tf.keras.models.load_model(save_path)\n",
     "print('Loaded.')"
    ]
   },
@@ -386,6 +384,10 @@
     "\n",
     "sequence_pred = loaded_model.predict( np.array([sequence]) )\n",
     "\n",
+    "print('sequence shape      :',sequence.shape)\n",
+    "print('sequence true shape :',sequence_true.shape)\n",
+    "print('sequence pred shape :',sequence_pred.shape)\n",
+    "\n",
     "pwk.plot_2d_segment(sequence_true, sequence_pred)\n",
     "pwk.plot_multivariate_serie(sequence_true, predictions=sequence_pred, labels=['Axis=0', 'Axis=1'],save_as='04-one-step-prediction')"
    ]
@@ -395,7 +397,8 @@
    "metadata": {},
    "source": [
     "### 5.3 - Make n-steps prediction\n",
-    "A longer term prediction, via a nice iteration function :"
+    "A longer term prediction, via a nice iteration function  \n",
+    "We will perform <iteration> predictions to iteratively build our prediction."
    ]
   },
   {
@@ -471,7 +474,8 @@
    "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387"
   },
   "kernelspec": {
-   "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)",
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
diff --git a/SYNOP/SYNOP1-Preparation-of-data.ipynb b/SYNOP/SYNOP1-Preparation-of-data.ipynb
index 5f09b9f..081e533 100644
--- a/SYNOP/SYNOP1-Preparation-of-data.ipynb
+++ b/SYNOP/SYNOP1-Preparation-of-data.ipynb
@@ -124,7 +124,7 @@
    "metadata": {},
    "source": [
     "### 3.1 - Read dataset description\n",
-    "We need the list and description of the columns."
+    "Get columns names of the dataset from the schema description"
    ]
   },
   {
@@ -157,16 +157,20 @@
     "display(df.tail(10))\n",
     "\n",
     "# ---- Get the columns name as descriptions\n",
+    "#\n",
     "synop_desc = list(df.columns)\n",
     "\n",
     "# ---- Set Codes as columns name\n",
+    "#\n",
     "df.columns   = synop_codes\n",
     "code2desc    = dict(zip(synop_codes, synop_desc))\n",
     "\n",
     "# ---- Count the na values by columns\n",
+    "#\n",
     "columns_na = df.isna().sum().tolist()\n",
     "\n",
     "# ---- Show all of that\n",
+    "#\n",
     "df_desc=pd.DataFrame({'Code':synop_codes, 'Description':synop_desc, 'Na':columns_na})\n",
     "\n",
     "pwk.subtitle('List of columns :')\n",
@@ -199,22 +203,49 @@
     "# ---- Show all of that\n",
     "\n",
     "pwk.subtitle('Our selected columns :')\n",
-    "display(df.head(20))\n",
-    "\n",
+    "display(df.head(20))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4.2 - Few stats\n",
+    "Note : We note that per column is constant, so we can drop it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "pwk.subtitle('Few statistics :')\n",
     "display(df.describe().style.format('{:.2f}'))\n",
     "\n",
     "# ---- 'per' column is constant, we can drop it\n",
     "\n",
-    "df.drop(['per'],axis=1,inplace=True)\n"
+    "df.drop(['per'],axis=1,inplace=True)\n",
+    "\n",
+    "# ---- Count the na values by columns\n",
+    "#\n",
+    "dataset_na    = df.isna().sum().tolist()\n",
+    "dataset_cols  = df.columns.tolist()\n",
+    "dataset_desc  = [ code2desc[c] for c in dataset_cols ]\n",
+    "\n",
+    "# ---- Show all of that\n",
+    "#\n",
+    "pwk.subtitle('Do we have na values ?')\n",
+    "df_desc=pd.DataFrame({'Columns':dataset_cols, 'Description':dataset_desc, 'Na':dataset_na})\n",
+    "display(df_desc.style.set_properties(**{'text-align': 'left'}))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 4.2 - Cleanup dataset\n",
-    "Let's sort it and cook up some NaN values"
+    "### 4.3 - Cleanup dataset\n",
+    "Let's sort it and cook up NaN values with an interpolation"
    ]
   },
   {
@@ -259,11 +290,13 @@
    "outputs": [],
    "source": [
     "# ---- Count the na values by columns\n",
+    "#\n",
     "dataset_na    = df.isna().sum().tolist()\n",
     "dataset_cols  = df.columns.tolist()\n",
     "dataset_desc  = [ code2desc[c] for c in dataset_cols ]\n",
     "\n",
     "# ---- Show all of that\n",
+    "#\n",
     "df_desc=pd.DataFrame({'Columns':dataset_cols, 'Description':dataset_desc, 'Na':dataset_na})\n",
     "pwk.subtitle('Dataset columns :')\n",
     "display(df_desc.style.set_properties(**{'text-align': 'left'}))\n",
@@ -345,7 +378,8 @@
    "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387"
   },
   "kernelspec": {
-   "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)",
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
diff --git a/SYNOP/SYNOP2-First-predictions.ipynb b/SYNOP/SYNOP2-First-predictions.ipynb
index 1d26012..c4fc1e8 100644
--- a/SYNOP/SYNOP2-First-predictions.ipynb
+++ b/SYNOP/SYNOP2-First-predictions.ipynb
@@ -165,6 +165,7 @@
    "outputs": [],
    "source": [
     "# ---- Train generator\n",
+    "\n",
     "train_generator = TimeseriesGenerator(dataset_train, dataset_train, length=sequence_len,  batch_size=batch_size)\n",
     "test_generator  = TimeseriesGenerator(dataset_test,  dataset_test,  length=sequence_len,  batch_size=batch_size)\n",
     "\n",
@@ -178,9 +179,9 @@
     "print('batch y shape : ',y.shape)\n",
     "\n",
     "x,y=train_generator[0]\n",
-    "pwk.subtitle('What a batch looks like (x) :')\n",
+    "pwk.subtitle('What a batch looks like (x[0]) :')\n",
     "pwk.np_print(x[0] )\n",
-    "pwk.subtitle('What a batch looks like (y) :')\n",
+    "pwk.subtitle('What a batch looks like (y[0]) :')\n",
     "pwk.np_print(y[0])"
    ]
   },
@@ -401,7 +402,8 @@
    "hash": "8e38643e33497db9a306e3f311fa98cb1e65371278ca73ee4ea0c76aa5a4f387"
   },
   "kernelspec": {
-   "display_name": "Python 3.9.7 64-bit ('fidle-cpu': conda)",
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
diff --git a/SYNOP/SYNOP3-12h-predictions.ipynb b/SYNOP/SYNOP3-12h-predictions.ipynb
index 40d9c3f..cd0f27f 100644
--- a/SYNOP/SYNOP3-12h-predictions.ipynb
+++ b/SYNOP/SYNOP3-12h-predictions.ipynb
@@ -308,7 +308,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -322,7 +322,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
-- 
GitLab