Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n",
"# <!-- TITLE --> [K3LADYB1] - Prediction of a 2D trajectory via RNN\n",
"<!-- DESC --> Artificial dataset generation and prediction attempt via a recurrent network, using Keras 3 and PyTorch\n",
"<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
"\n",
"## Objectives :\n",
" - Understanding the use of a recurrent neural network\n",
"\n",
"## What we're going to do :\n",
"\n",
" - Generate an artificial dataset\n",
" - dataset preparation\n",
" - Doing our testing\n",
" - Making predictions\n",
"\n",
"## Step 1 - Import and init\n",
"### 1.1 - Python"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ['KERAS_BACKEND'] = 'torch'\n",
"import keras\n",
"from math import cos, sin\n",
"import random\n",
"run_id, run_dir, datasets_dir = fidle.init('K3LADYB1')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.2 - Parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ---- About dataset\n",
"#\n",
"max_t = 1000\n",
"delta_t = 0.01\n",
"features_len = 2\n",
"\n",
"\n",
"sequence_len = 20\n",
"predict_len = 5\n",
"\n",
"# ---- About training\n",
"#\n",
"scale = .2 # Percentage of dataset to be used (1=all)\n",
"train_prop = .8 # Percentage for train (the rest being for the test)\n",
"batch_size = 32\n",
"epochs = 5\n",
"fit_verbosity = 1 # 0 = silent, 1 = progress bar, 2 = one line per epoch"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Override parameters (batch mode) - Just forget this cell"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fidle.override('scale', 'train_prop', 'sequence_len', 'predict_len', 'batch_size', 'epochs', 'fit_verbosity')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2 - Generation of a fun dataset\n",
"### 2.1 - Virtual trajectory of our ladybug"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def ladybug_init(s=122):\n",
" \n",
" if s>0 : random.seed(s)\n",
" ladybug_init.params_x = [ random.gauss(0.,1.) for u in range(8)]\n",
" ladybug_init.params_y = [ random.gauss(0.,1.) for u in range(8)]\n",
" \n",
"def ladybug_move(t):\n",
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
" [ax1, ax2, ax3, ax4, kx1, kx2, kx3, kx4] = ladybug_init.params_x\n",
" [ay1, ay2, ay3, ay4, ky1, ky2, ky3, ky4] = ladybug_init.params_y\n",
" \n",
" x = ax1*sin(t*(kx1+20)) + ax2*cos(t*(kx2+10)) + ax3*sin(t*(kx3+5)) + ax4*cos(t*(kx4+5))\n",
" y = ay1*cos(t*(ky1+20)) + ay2*sin(t*(ky2+10)) + ay3*cos(t*(ky3+5)) + ay4*sin(t*(ky4+5)) \n",
"\n",
" return x,y"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.2 - Get some positions, and build a rescaled and normalized dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ---- Get positions\n",
"#\n",
"ladybug_init(s=16)\n",
"x,y = 0,0\n",
"positions=[]\n",
"for t in np.arange(0., max_t, delta_t):\n",
" x,y = ladybug_move(t)\n",
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
"\n",
"# ---- Build rescaled dataset\n",
"#\n",
"n = int( len(positions)*scale )\n",
"dataset = np.array(positions[:n])\n",
"\n",
"k = int(len(dataset)*train_prop)\n",
"x_train = dataset[:k]\n",
"x_test = dataset[k:]\n",
"\n",
"# ---- Normalize\n",
"#\n",
"mean = x_train.mean()\n",
"std = x_train.std()\n",
"x_train = (x_train - mean) / std\n",
"x_test = (x_test - mean) / std\n",
"\n",
"print(\"Dataset generated.\")\n",
"print(\"Train shape is : \", x_train.shape)\n",
"print(\"Test shape is : \", x_test.shape)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3 - Have a look\n",
"An extract from the data we have: the virtual trajectory of our ladybug \n",
"And what we want to predict (in red), from a segment (in blue)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fidle.scrawler.serie_2d(x_train[:1000], figsize=(12,12), lw=1,ms=4,save_as='01-dataset')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"k1,k2 = sequence_len, predict_len\n",
"i = random.randint(0,len(x_test)-k1-k2)\n",
"j = i+k1\n",
"\n",
"fidle.scrawler.segment_2d( x_test[i:j+k2], x_test[j:j+k2],ms=6, save_as='02-objectives')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.4 - Prepare sequences from datasets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ---- Create sequences and labels for train and test\n",
"xs_train, ys_train=[],[]\n",
"all_i = np.random.permutation( len(x_train) - sequence_len - 1 )\n",
"for i in all_i:\n",
" xs_train.append( x_train[ i : i+sequence_len ] )\n",
" ys_train.append( x_train[ i+sequence_len+1 ] )\n",
" \n",
"xs_test, ys_test=[],[]\n",
"for i in range( len(x_test) - sequence_len - 1):\n",
" xs_test.append( x_test[ i : i+sequence_len ] )\n",
" ys_test.append( x_test[ i+sequence_len+1 ] )\n",
"# ---- Convert to numpy / float16\n",
" \n",
"xs_train = np.array(xs_train, dtype='float16')\n",
"ys_train = np.array(ys_train, dtype='float16')\n",
"xs_test = np.array(xs_test, dtype='float16')\n",
"ys_test = np.array(ys_test, dtype='float16')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fidle.utils.subtitle('About the splitting of our dataset :')\n",
"print('Number of sequences : ', len(xs_train))\n",
"print('xs_train shape : ',xs_train.shape)\n",
"print('ys_train shape : ',ys_train.shape)\n",
"\n",
"fidle.utils.subtitle('What an xs look like :')\n",
"fidle.utils.np_print(xs_train[10] )\n",
"fidle.utils.subtitle('What an ys look like :')\n",
"fidle.utils.np_print(ys_train[10])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 3 - Create a model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = keras.models.Sequential()\n",
"model.add( keras.layers.InputLayer(shape=(sequence_len, features_len)) )\n",
"model.add( keras.layers.GRU(200, return_sequences=False, activation='relu') )\n",
"model.add( keras.layers.Dense(features_len) )\n",
"\n",
"model.summary()\n",
"model.compile(optimizer='rmsprop', \n",
" loss='mse', \n",
" metrics = ['mae'] )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 4 - Train the model\n",
"### 4.1 Add Callbacks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"os.makedirs(f'{run_dir}/models', mode=0o750, exist_ok=True)\n",
"save_dir = f'{run_dir}/models/best_model.keras'\n",
"\n",
"savemodel_callback = keras.callbacks.ModelCheckpoint( filepath=save_dir, monitor='val_mae', mode='max', save_best_only=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4.2 - Train it\n",
"Need 3' on a cpu laptop"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"chrono=fidle.Chrono()\n",
"chrono.start()\n",
"history=model.fit(xs_train,ys_train,\n",
" epochs = epochs, \n",
" verbose = fit_verbosity,\n",
" validation_data = (xs_test, ys_test),\n",
" callbacks = [savemodel_callback])\n",
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fidle.scrawler.history(history,plot={'loss':['loss','val_loss'], 'mae':['mae','val_mae']}, save_as='03-history')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 5 - Predict"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 5.1 - Load model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"loaded_model = keras.models.load_model(f'{run_dir}/models/best_model.keras')\n",
"print('Loaded.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 5.2 - Make a 1-step prediction\n",
"A simple prediction on a single iteration"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s=random.randint(0,len(x_test)-sequence_len)\n",
"\n",
"sequence = x_test[s:s+sequence_len]\n",
"sequence_true = x_test[s:s+sequence_len+1]\n",
"\n",
"sequence_pred = loaded_model.predict( np.array([sequence]), verbose=fit_verbosity )\n",
"print('sequence shape :',sequence.shape)\n",
"print('sequence true shape :',sequence_true.shape)\n",
"print('sequence pred shape :',sequence_pred.shape)\n",
"\n",
"fidle.scrawler.segment_2d(sequence_true, sequence_pred, save_as='04-one-step-prediction')\n",
"fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, labels=['Axis=0', 'Axis=1'],save_as='05-one-step-prediction-2axis')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 5.3 - Make n-steps prediction\n",
"A longer term prediction, via a nice iteration function \n",
"We will perform <iteration> predictions to iteratively build our prediction."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_prediction(dataset, model, iterations=4):\n",
"\n",
" # ---- Initial sequence\n",
" #\n",
" s=random.randint(0,len(dataset)-sequence_len-iterations)\n",
"\n",
" sequence_pred = dataset[s:s+sequence_len].copy()\n",
" sequence_true = dataset[s:s+sequence_len+iterations].copy()\n",
"\n",
" # ---- Iterate \n",
" #\n",
" sequence_pred = list(sequence_pred)\n",
"\n",
" for i in range(iterations):\n",
" sequence = sequence_pred[-sequence_len:]\n",
" prediction = model.predict( np.array([sequence]), verbose=fit_verbosity )\n",
" sequence_pred.append(prediction[0])\n",
"\n",
" # ---- Extract the predictions \n",
" #\n",
" prediction = np.array(sequence_pred[-iterations:])\n",
"\n",
" return sequence_true,prediction"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"An n-steps prediction :"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sequence_true, sequence_pred = get_prediction(x_test, loaded_model, iterations=5)\n",
"\n",
"fidle.scrawler.segment_2d(sequence_true, sequence_pred, ms=8, save_as='06-n-steps-prediction-norm')\n",
"fidle.scrawler.multivariate_serie(sequence_true, predictions=sequence_pred, hide_ticks=True, labels=['Axis=0', 'Axis=1'],save_as='07-n-steps-prediction-norm')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"<img width=\"80px\" src=\"../fidle/img/logo-paysage.svg\"></img>"
]
}
],
"metadata": {
"kernelspec": {
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
},
"vscode": {
"interpreter": {
"hash": "b3929042cc22c1274d74e3e946c52b845b57cb6d84f2d591ffe0519b38e4896d"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}