<img width="800px" src="../fidle/img/header.svg"></img>

# <!-- TITLE --> [POLR1] - Complexity Syndrome
<!-- DESC --> Illustration of the problem of complexity with the polynomial regression
<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->

## Objectives :
 - Visualizing and understanding under and overfitting
 
## What we're going to do :

We are looking for a polynomial function to approximate the observed series : 
$ y = a_n\cdot x^n + \dots + a_i\cdot x^i + \dots + a_1\cdot x + b $ 


## Step 1 - Import and init

In [None]:
import numpy as np
import math
import random
import matplotlib
import matplotlib.pyplot as plt
import sys
import fidle

# Init Fidle environment
run_id, run_dir, datasets_dir = fidle.init('POLR1')

## Step 2 - Dataset generation

In [None]:
# ---- Parameters

n = 100

xob_min = -5
xob_max = 5

deg = 7
a_min = -2
a_max = 2

noise = 2000

# ---- Train data
# X,Y : data
# X_norm,Y_norm : normalized data

X = np.random.uniform(xob_min,xob_max,(n,1))
# N = np.random.uniform(-noise,noise,(n,1))
N = noise * np.random.normal(0,1,(n,1))

a = np.random.uniform(a_min,a_max, (deg,))
fy = np.poly1d( a )

Y = fy(X) + N

# ---- Data normalization
#
X_norm = (X - X.mean(axis=0)) / X.std(axis=0)
Y_norm = (Y - Y.mean(axis=0)) / Y.std(axis=0)

# ---- Data visualization

width = 12
height = 6
nb_viz = min(2000,n)

def vector_infos(name,V):
 m=V.mean(axis=0).item()
 s=V.std(axis=0).item()
 print("{:8} : mean={:+12.4f} std={:+12.4f} min={:+12.4f} max={:+12.4f}".format(name,m,s,V.min(),V.max()))


fidle.utils.display_md('#### Generator :')
print(f"Nomber of points={n} deg={deg} bruit={noise}")

fidle.utils.display_md('#### Datasets :')
print(f"{nb_viz} points visibles sur {n})")
plt.figure(figsize=(width, height))
plt.plot(X[:nb_viz], Y[:nb_viz], '.')
plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
plt.xlabel('x axis')
plt.ylabel('y axis')
fidle.scrawler.save_fig("01-dataset")
plt.show()

fidle.utils.display_md('#### Before normalization :')
vector_infos('X',X)
vector_infos('Y',Y)

fidle.utils.display_md('#### After normalization :') 
vector_infos('X_norm',X_norm)
vector_infos('Y_norm',Y_norm)


## Step 3 - Polynomial regression with NumPy
### 3.1 - Underfitting

In [None]:
def draw_reg(X_norm, Y_norm, x_hat,fy_hat, size, save_as):
 plt.figure(figsize=size)
 plt.plot(X_norm, Y_norm, '.')

 x_hat = np.linspace(X_norm.min(), X_norm.max(), 100)

 plt.plot(x_hat, fy_hat(x_hat))
 plt.tick_params(axis='both', which='both', bottom=False, left=False, labelbottom=False, labelleft=False)
 plt.xlabel('x axis')
 plt.ylabel('y axis')
 fidle.scrawler.save_fig(save_as)
 plt.show()

In [None]:
reg_deg=1

a_hat = np.polyfit(X_norm.reshape(-1,), Y_norm.reshape(-1,), reg_deg)
fy_hat = np.poly1d( a_hat )

print(f'Nombre de degrés : {reg_deg}')
draw_reg(X_norm[:nb_viz],Y_norm[:nb_viz], X_norm,fy_hat, (width,height), save_as='02-underfitting')

### 3.2 - Good fitting

In [None]:
reg_deg=5

a_hat = np.polyfit(X_norm.reshape(-1,), Y_norm.reshape(-1,), reg_deg)
fy_hat = np.poly1d( a_hat )

print(f'Nombre de degrés : {reg_deg}')
draw_reg(X_norm[:nb_viz],Y_norm[:nb_viz], X_norm,fy_hat, (width,height), save_as='03-good_fitting')

### 3.3 - Overfitting

In [None]:
reg_deg=24

a_hat = np.polyfit(X_norm.reshape(-1,), Y_norm.reshape(-1,), reg_deg)
fy_hat = np.poly1d( a_hat )

print(f'Nombre de degrés : {reg_deg}')
draw_reg(X_norm[:nb_viz],Y_norm[:nb_viz], X_norm,fy_hat, (width,height), save_as='04-over_fitting')

In [None]:
fidle.end()

---
<img width="80px" src="../fidle/img/logo-paysage.svg"></img>