01-Ladybug.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n",
    "\n",
    "# <!-- TITLE --> [K3LADYB1] - Prediction of a 2D trajectory via RNN\n",
    "<!-- DESC --> Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch\n",
    "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
    "\n",
    "## Objectives :\n",
    " - Understanding the use of a recurrent neural network\n",
    "\n",
    "## What we're going to do :\n",
    "\n",
    " - Generate an artificial dataset\n",
    " - dataset preparation\n",
    " - Doing our testing\n",
    " - Making predictions\n",
    "\n",
    "## Step 1 - Import and init\n",
    "### 1.1 - Python"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['KERAS_BACKEND'] = 'torch'\n",
    "\n",
    "import keras\n",
    "import numpy as np\n",
    "from math import cos, sin\n",
    "import random\n",
    "\n",
    "import fidle\n",
    "\n",
    "# Init Fidle environment\n",
    "run_id, run_dir, datasets_dir = fidle.init('K3LADYB1')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2 - Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ---- About dataset\n",
    "#\n",
    "max_t        = 1000\n",
    "delta_t      = 0.01\n",
    "features_len = 2\n",
    "\n",
    "\n",
    "sequence_len = 20\n",
    "predict_len  = 5\n",
    "\n",
    "# ---- About training\n",
    "#\n",
    "scale         = .2       # Percentage of dataset to be used (1=all)\n",
    "train_prop    = .8       # Percentage for train (the rest being for the test)\n",
    "batch_size    = 32\n",
    "epochs        = 5\n",
    "fit_verbosity = 1        # 0 = silent, 1 = progress bar, 2 = one line per epoch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Override parameters (batch mode) - Just forget this cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fidle.override('scale', 'train_prop', 'sequence_len', 'predict_len', 'batch_size', 'epochs', 'fit_verbosity')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2 - Generation of a fun dataset\n",
    "### 2.1 - Virtual trajectory of our ladybug"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ladybug_init(s=122):\n",
    "    \n",
    "    if s>0 : random.seed(s)\n",
    "    ladybug_init.params_x = [ random.gauss(0.,1.) for u in range(8)]\n",
    "    ladybug_init.params_y = [ random.gauss(0.,1.) for u in range(8)]\n",
    "    \n",
    "def ladybug_move(t):\n",
    "\n",
    "    [ax1, ax2, ax3, ax4, kx1, kx2, kx3, kx4] = ladybug_init.params_x\n",
    "    [ay1, ay2, ay3, ay4, ky1, ky2, ky3, ky4] = ladybug_init.params_y\n",
    "    \n",
    "    x = ax1*sin(t*(kx1+20)) + ax2*cos(t*(kx2+10)) + ax3*sin(t*(kx3+5)) + ax4*cos(t*(kx4+5))\n",
    "    y = ay1*cos(t*(ky1+20)) + ay2*sin(t*(ky2+10)) + ay3*cos(t*(ky3+5)) + ay4*sin(t*(ky4+5)) \n",
    "\n",
    "    return x,y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2 - Get some positions, and build a rescaled and normalized dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ---- Get positions\n",
    "#\n",
    "ladybug_init(s=16)\n",
    "x,y = 0,0\n",
    "positions=[]\n",
    "for t in np.arange(0., max_t, delta_t):\n",
    "    x,y = ladybug_move(t)\n",
    "    positions.append([x,y])\n",
    "\n",
    "# ---- Build rescaled dataset\n",
    "#\n",
    "n = int( len(positions)*scale )\n",
    "dataset = np.array(positions[:n])\n",
    "\n",
    "k = int(len(dataset)*train_prop)\n",
    "x_train = dataset[:k]\n",
    "x_test  = dataset[k:]\n",
    "\n",
    "# ---- Normalize\n",
    "#\n",
    "mean = x_train.mean()\n",
    "std  = x_train.std()\n",
    "x_train = (x_train - mean) / std\n",
    "x_test  = (x_test  - mean) / std\n",
    "\n",
    "print(\"Dataset generated.\")\n",
    "print(\"Train shape is : \", x_train.shape)\n",
    "print(\"Test  shape is : \", x_test.shape)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3 - Have a look\n",
    "An extract from the data we have: the virtual trajectory of our ladybug   \n",
    "And what we want to predict (in red), from a segment (in blue)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fidle.scrawler.serie_2d(x_train[:1000], figsize=(12,12), lw=1,ms=4,save_as='01-dataset')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "k1,k2 = sequence_len, predict_len\n",
    "i = random.randint(0,len(x_test)-k1-k2)\n",
    "j = i+k1\n",
    "\n",
    "fidle.scrawler.segment_2d( x_test[i:j+k2], x_test[j:j+k2],ms=6, save_as='02-objectives')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.4 - Prepare sequences from datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ---- Create sequences and labels for train and test\n",
    "#\n",
    "xs_train, ys_train=[],[]\n",
    "all_i = np.random.permutation( len(x_train) - sequence_len - 1 )\n",
    "\n",
    "for i in all_i:\n",
    "    xs_train.append( x_train[ i : i+sequence_len ] )\n",
    "    ys_train.append( x_train[ i+sequence_len+1 ]   )\n",
    "    \n",
    "xs_test, ys_test=[],[]\n",
    "for i in range( len(x_test) - sequence_len - 1):\n",
    "    xs_test.append( x_test[ i : i+sequence_len ] )\n",
    "    ys_test.append( x_test[ i+sequence_len+1 ]   )\n",
    "\n",
    "# ---- Convert to numpy / float16\n",
    "    \n",
    "xs_train = np.array(xs_train, dtype='float16')\n",
    "ys_train = np.array(ys_train, dtype='float16')\n",
    "xs_test  = np.array(xs_test,  dtype='float16')\n",
    "ys_test  = np.array(ys_test,  dtype='float16')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fidle.utils.subtitle('About the splitting of our dataset :')\n",
    "\n",
    "print('Number of sequences : ', len(xs_train))\n",
    "print('xs_train shape      : ',xs_train.shape)\n",
    "print('ys_train shape      : ',ys_train.shape)\n",
    "\n",
    "fidle.utils.subtitle('What an xs look like :')\n",
    "fidle.utils.np_print(xs_train[10] )\n",
    "fidle.utils.subtitle('What an ys look like :')\n",
    "fidle.utils.np_print(ys_train[10])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3 - Create a model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.Sequential()\n",
    "model.add( keras.layers.InputLayer(shape=(sequence_len, features_len)) )\n",
    "model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )\n",
    "model.add( keras.layers.Dense(features_len) )\n",
    "\n",
    "model.summary()\n",
    "\n",
    "model.compile(optimizer='rmsprop', \n",
    "              loss='mse', \n",
    "              metrics   = ['mae'] )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 4 - Train the model\n",
    "### 4.1 Add Callbacks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.makedirs(f'{run_dir}/models',   mode=0o750, exist_ok=True)\n",
    "save_dir = f'{run_dir}/models/best_model.keras'\n",
    "\n",
    "savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_mae', mode='max', save_best_only=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.2 - Train it\n",
    "Need 3' on a cpu laptop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "chrono=fidle.Chrono()\n",
    "chrono.start()\n",
    "\n",
    "history=model.fit(xs_train,ys_train,\n",
    "                  epochs  = epochs, \n",
    "                  verbose = fit_verbosity,\n",
    "                  validation_data = (xs_test, ys_test),\n",
    "                  callbacks = [savemodel_callback])\n",
    "\n",
    "chrono.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fidle.scrawler.history(history,plot={'loss':['loss','val_loss'], 'mae':['mae','val_mae']}, save_as='03-history')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 5 - Predict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.1 - Load model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "loaded_model = keras.models.load_model(f'{run_dir}/models/best_model.keras')\n",
    "print('Loaded.')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.2 - Make a 1-step prediction\n",
    "A simple prediction on a single iteration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s=random.randint(0,len(x_test)-sequence_len)\n",
    "\n",
    "sequence      = x_test[s:s+sequence_len]\n",
    "sequence_true = x_test[s:s+sequence_len+1]\n",
    "\n",
    "sequence_pred = loaded_model.predict( np.array([sequence]), verbose=fit_verbosity )\n",
    "\n",
    "print('sequence shape      :',sequence.shape)\n",
    "print('sequence true shape :',sequence_true.shape)\n",
    "print('sequence pred shape :',sequence_pred.shape)\n",
    "\n",
    "fidle.scrawler.segment_2d(sequence_true, sequence_pred, save_as='04-one-step-prediction')\n",
    "fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, labels=['Axis=0', 'Axis=1'],save_as='05-one-step-prediction-2axis')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.3 - Make n-steps prediction\n",
    "A longer term prediction, via a nice iteration function  \n",
    "We will perform <iteration> predictions to iteratively build our prediction."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_prediction(dataset, model, iterations=4):\n",
    "\n",
    "    # ---- Initial sequence\n",
    "    #\n",
    "    s=random.randint(0,len(dataset)-sequence_len-iterations)\n",
    "\n",
    "    sequence_pred = dataset[s:s+sequence_len].copy()\n",
    "    sequence_true = dataset[s:s+sequence_len+iterations].copy()\n",
    "\n",
    "    # ---- Iterate \n",
    "    #\n",
    "    sequence_pred = list(sequence_pred)\n",
    "\n",
    "    for i in range(iterations):\n",
    "        sequence   = sequence_pred[-sequence_len:]\n",
    "        prediction = model.predict( np.array([sequence]), verbose=fit_verbosity )\n",
    "        sequence_pred.append(prediction[0])\n",
    "\n",
    "    # ---- Extract the predictions    \n",
    "    #\n",
    "    prediction = np.array(sequence_pred[-iterations:])\n",
    "\n",
    "    return sequence_true,prediction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An n-steps prediction :"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sequence_true, sequence_pred = get_prediction(x_test, loaded_model, iterations=5)\n",
    "\n",
    "fidle.scrawler.segment_2d(sequence_true, sequence_pred, ms=8, save_as='06-n-steps-prediction-norm')\n",
    "fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, hide_ticks=True, labels=['Axis=0', 'Axis=1'],save_as='07-n-steps-prediction-norm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fidle.end()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "<img width=\"80px\" src=\"../fidle/img/logo-paysage.svg\"></img>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  },
  "vscode": {
   "interpreter": {
    "hash": "b3929042cc22c1274d74e3e946c52b845b57cb6d84f2d591ffe0519b38e4896d"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}