Replace 01-DNN-Wine-Regression-lightning.ipynb

40119355 · Achille Mbogol Touye · fe6377ec · 40119355
Commit 40119355 authored 1 year ago by Achille Mbogol Touye
--- a/Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb
+++ b/Wine.Lightning/01-DNN-Wine-Regression-lightning.ipynb
@@ -73,9 +73,11 @@
    "import torch.nn.functional as F\n",
    "import torchvision.transforms as T\n",
    "\n",
-    "from IPython.display import Markdown\n",
+    "\n",
    "from importlib import reload\n",
+    "from IPython.display import Markdown\n",
    "from torch.utils.data import Dataset, DataLoader, random_split\n",
+    "from modules.progressbar import CustomTrainProgressBar\n",
    "from modules.data_load import WineQualityDataset, Normalize, ToTensor\n",
    "from lightning.pytorch.loggers.tensorboard import TensorBoardLogger\n",
    "from torchmetrics.functional.regression import mean_absolute_error, mean_squared_error\n",
@@ -287,8 +289,9 @@
    "    def forward(self, x):                                              # forward pass\n",
    "        x = self.model(x)\n",
    "        return x        \n",
-    "        \n",
+    "\n",
-    "     # optimizer\n",
+    "   \n",
+    "    # optimizer\n",
    "    def configure_optimizers(self):                              \n",
    "        optimizer = torch.optim.RMSprop(self.parameters(),lr=1e-4)\n",
    "        return optimizer \n",
@@ -431,7 +434,8 @@
    "trainer = pl.Trainer(accelerator='auto',\n",
    "                     max_epochs=100,\n",
    "                     logger=logger,\n",
-    "                     callbacks=[savemodel_callback])\n",
+    "                     num_sanity_val_steps=0,\n",
+    "                     callbacks=[savemodel_callback,CustomTrainProgressBar()])\n",
    "\n",
    "trainer.fit(model=reg, train_dataloaders=train_loader, val_dataloaders=test_loader)"
   ]
@@ -474,7 +478,7 @@
   "source": [
    "# launch Tensorboard \n",
    "%reload_ext tensorboard\n",
-    "%tensorboard --logdir=Wine_logs/reg_logs/"
+    "%tensorboard --logdir=Wine_logs/reg_logs/ --bind_all"
   ]
  },
  {
@@ -589,6 +593,13 @@
    "---\n",
    "<img width=\"80px\" src=\"../fidle/img/logo-paysage.svg\"></img>"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {

 %% Cell type:markdown id: tags:
 <img width="800px" src="../fidle/img/header.svg"></img>
 # <!-- TITLE --> [WINE1] - Wine quality prediction with a Dense Network (DNN) using Lightning
  <!-- DESC -->  Another example of regression, with a wine quality prediction!
  <!-- AUTHOR : Achille Mbogol Touye (EFFILIA-MIAI/SIMaP) -->
 ## Objectives :
 - Predict the **quality of wines**, based on their analysis
 - Understanding the principle and the architecture of a regression with a dense neural network with backup and restore of the trained model.
 The **[Wine Quality datasets](https://archive.ics.uci.edu/ml/datasets/wine+Quality)** are made up of analyses of a large number of wines, with an associated quality (between 0 and 10)
 This dataset is provide by :
 Paulo Cortez, University of Minho, Guimarães, Portugal, http://www3.dsi.uminho.pt/pcortez
 A. Cerdeira, F. Almeida, T. Matos and J. Reis, Viticulture Commission of the Vinho Verde Region(CVRVV), Porto, Portugal, @2009
 This dataset can be retreive at [University of California Irvine (UCI)](https://archive-beta.ics.uci.edu/ml/datasets/wine+quality)
 Due to privacy and logistic issues, only physicochemical and sensory variables are available
 There is no data about grape types, wine brand, wine selling price, etc.
 - fixed acidity
 - volatile acidity
 - citric acid
 - residual sugar
 - chlorides
 - free sulfur dioxide
 - total sulfur dioxide
 - density
 - pH
 - sulphates
 - alcohol
 - quality (score between 0 and 10)
 ## What we're going to do :
 - (Retrieve data)
 - (Preparing the data)
 - (Build a model)
 - Train and save the model
 - Restore saved model
 - Evaluate the model
 - Make some predictions
 %% Cell type:markdown id: tags:
 ## Step 1 - Import and init
 %% Cell type:code id: tags:
 ``` python
 # Import some packages
 import os
 import sys
 import numpy as np
 import pandas as pd
 import torch
 import torch.nn as nn
 import lightning.pytorch as pl
 import torch.nn.functional as F
 import torchvision.transforms as T
-from IPython.display import Markdown
 from importlib import reload
+from IPython.display import Markdown
 from torch.utils.data import Dataset, DataLoader, random_split
+from modules.progressbar import CustomTrainProgressBar
 from modules.data_load import WineQualityDataset, Normalize, ToTensor
 from lightning.pytorch.loggers.tensorboard import TensorBoardLogger
 from torchmetrics.functional.regression import mean_absolute_error, mean_squared_error
 import fidle
 # Init Fidle environment
 run_id, run_dir, datasets_dir = fidle.init('WINE1-Lightning')
 ```
 %% Cell type:markdown id: tags:
 Verbosity during training :
 - 0 = silent
 - 1 = progress bar
 - 2 = one line per epoch
 %% Cell type:code id: tags:
 ``` python
 fit_verbosity = 1
 dataset_name  = 'winequality-red.csv'
 ```
 %% Cell type:markdown id: tags:
 Override parameters (batch mode) - Just forget this cell
 %% Cell type:code id: tags:
 ``` python
 fidle.override('fit_verbosity', 'dataset_name')
 ```
 %% Cell type:markdown id: tags:
 ## Step 2 - Retrieve data
 %% Cell type:code id: tags:
 ``` python
 csv_file_path=f'{datasets_dir}/WineQuality/origine/{dataset_name}'
 datasets=WineQualityDataset(csv_file_path)
 display(datasets.data.head(5).style.format("{0:.2f}"))
 print('Missing Data : ',datasets.data.isna().sum().sum(), '  Shape is : ', datasets.data.shape)
 ```
 %% Cell type:markdown id: tags:
 ## Step 3 - Preparing the data
 %% Cell type:markdown id: tags:
 ### 3.1 - Data normalization
 **Note :**
 - All input features must be normalized.
 - To do this we will subtract the mean and divide by the standard deviation for each input features.
 - Then we convert numpy array features and target **(quality)** to torch tensor
 %% Cell type:code id: tags:
 ``` python
 transforms=T.Compose([Normalize(csv_file_path), ToTensor()])
 dataset=WineQualityDataset(csv_file_path,transform=transforms)
 ```
 %% Cell type:code id: tags:
 ``` python
 display(Markdown("before normalization :"))
 display(datasets[:]["features"])
 print()
 display(Markdown("After normalization :"))
 display(dataset[:]["features"])
 ```
 %% Cell type:markdown id: tags:
 ### 3.2 - Split data
 We will use 80% of the data for training and 20% for validation.
 x will be the features data of the analysis and y the target (quality)
 %% Cell type:code id: tags:
 ``` python
 # ---- Split => train, test
 #
 data_train_len = int(len(dataset)*0.8)            # get 80 %
 data_test_len  = len(dataset) -data_train_len     # test = all - train
 # ---- Split => x,y with random_split
 #
 data_train_subset, data_test_subset=random_split(dataset, [data_train_len, data_test_len])
 x_train = data_train_subset[:]["features"]
 y_train = data_train_subset[:]["quality" ]
 x_test  = data_test_subset [:]["features"]
 y_test  = data_test_subset [:]["quality" ]
 print('Original data shape was : ',dataset.data.shape)
 print('x_train : ',x_train.shape, 'y_train : ',y_train.shape)
 print('x_test  : ',x_test.shape,  'y_test  : ',y_test.shape)
 ```
 %% Cell type:markdown id: tags:
 ### 3.3 -  For Training model use Dataloader
 The Dataset retrieves our dataset’s features and labels one sample at a time. While training a model, we typically want to pass samples in minibatches, reshuffle the data at every epoch to reduce model overfitting. DataLoader is an iterable that abstracts this complexity for us in an easy API.
 %% Cell type:code id: tags:
 ``` python
 # train bacth data
 train_loader= DataLoader(
  dataset=data_train_subset,
  shuffle=True,
  batch_size=20,
  num_workers=2
 )
 # test bacth data
 test_loader= DataLoader(
  dataset=data_test_subset,
  shuffle=False,
  batch_size=20,
  num_workers=2
 )
 ```
 %% Cell type:markdown id: tags:
 ## Step 4 - Build a model
 More informations about :
 - [Optimizer](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers)
 - [Activation](https://www.tensorflow.org/api_docs/python/tf/keras/activations)
 - [Loss](https://www.tensorflow.org/api_docs/python/tf/keras/losses)
 - [Metrics](https://www.tensorflow.org/api_docs/python/tf/keras/metrics)
 %% Cell type:code id: tags:
 ``` python
 class LitRegression(pl.LightningModule):
    def __init__(self,in_features=11):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features, 128),                               # hidden layer 1
            nn.ReLU(),                                                 # activation function
            nn.Linear(128, 128),                                       # hidden layer 2
            nn.ReLU(),                                                 # activation function
            nn.Linear(128, 1))                                         # output layer
    def forward(self, x):                                              # forward pass
        x = self.model(x)
        return x
-     # optimizer
+    # optimizer
    def configure_optimizers(self):
        optimizer = torch.optim.RMSprop(self.parameters(),lr=1e-4)
        return optimizer
    def training_step(self, batch, batch_idx):
        # defines the train loop.
        x_features, y_target = batch["features"],batch["quality"]
        # forward pass
        y_pred = self.model(x_features)
        # loss function MSE
        loss   = F.mse_loss(y_pred, y_target)
        # metrics mae
        mae    = mean_absolute_error(y_pred,y_target)
        # metrics mse
        mse    = mean_squared_error(y_pred,y_target)
        metrics= {"train_loss": loss,
                   "train_mae" : mae,
                   "train_mse" : mse
                  }
        # logs metrics for each training_step
        self.log_dict(metrics,
                      on_step  = False,
                      on_epoch = True,
                      logger   = True,
                      prog_bar = True,
                     )
        return loss
    def validation_step(self, batch, batch_idx):
        # defines the val loop.
        x_features, y_target = batch["features"],batch["quality"]
        # forward pass
        y_pred = self.model(x_features)
        # loss function MSE
        loss   = F.mse_loss(y_pred, y_target)
        # metrics
        mae    = mean_absolute_error(y_pred,y_target)
        # metrics
        mse    = mean_squared_error(y_pred,y_target)
        metrics= {"val_loss": loss,
                   "val_mae" : mae,
                   "val_mse" : mse
                  }
        # logs metrics for each validation_step
        self.log_dict(metrics,
                      on_step  = False,
                      on_epoch = True,
                      logger   = True,
                      prog_bar = True,
                     )
        return metrics
 ```
 %% Cell type:markdown id: tags:
 ## 5 - Train the model
 ### 5.1 - Get it
 %% Cell type:code id: tags:
 ``` python
 reg=LitRegression(in_features=11)
 print(reg)
 ```
 %% Cell type:markdown id: tags:
 ### 5.2 - Add callback
 %% Cell type:code id: tags:
 ``` python
 os.makedirs('./run/models', exist_ok=True)
 save_dir = "./run/models/"
 filename ='best-model-{epoch}-{val_loss:.2f}'
 savemodel_callback = pl.callbacks.ModelCheckpoint(dirpath=save_dir,
                                                  filename=filename,
                                                  save_top_k=1,
                                                  verbose=False,
                                                  monitor="val_loss"
                                                 )
 ```
 %% Cell type:markdown id: tags:
 ### 5.3 - Train it
 %% Cell type:code id: tags:
 ``` python
 # loggers data
 logger  = TensorBoardLogger(save_dir='Wine_logs',name="reg_logs")
 ```
 %% Cell type:code id: tags:
 ``` python
 # train model
 trainer = pl.Trainer(accelerator='auto',
                     max_epochs=100,
                     logger=logger,
-                     callbacks=[savemodel_callback])
+                     num_sanity_val_steps=0,
+                     callbacks=[savemodel_callback,CustomTrainProgressBar()])
 trainer.fit(model=reg, train_dataloaders=train_loader, val_dataloaders=test_loader)
 ```
 %% Cell type:markdown id: tags:
 ## Step 6 - Evaluate
 ### 6.1 - Model evaluation
 MAE =  Mean Absolute Error (between the labels and predictions)
 A mae equal to 3 represents an average error in prediction of $3k.
 %% Cell type:code id: tags:
 ``` python
 score=trainer.validate(model=reg, dataloaders=test_loader, verbose=False)
 print('x_test / loss      : {:5.4f}'.format(score[0]['val_loss']))
 print('x_test / mae       : {:5.4f}'.format(score[0]['val_mae']))
 print('x_test / mse       : {:5.4f}'.format(score[0]['val_mse']))
 ```
 %% Cell type:markdown id: tags:
 ### 6.2 - Training history
 %% Cell type:code id: tags:
 ``` python
 # launch Tensorboard
 %reload_ext tensorboard
-%tensorboard --logdir=Wine_logs/reg_logs/
+%tensorboard --logdir=Wine_logs/reg_logs/ --bind_all
 ```
 %% Cell type:markdown id: tags:
 ## Step 7 - Restore a model :
 %% Cell type:markdown id: tags:
 ### 7.1 - Reload model
 %% Cell type:code id: tags:
 ``` python
 # Load the model from a checkpoint
 loaded_model = LitRegression.load_from_checkpoint(savemodel_callback.best_model_path)
 print("Loaded:")
 print(loaded_model)
 ```
 %% Cell type:markdown id: tags:
 ### 7.2 - Evaluate it :
 %% Cell type:code id: tags:
 ``` python
 score=trainer.validate(model=loaded_model, dataloaders=test_loader, verbose=False)
 print('x_test / loss      : {:5.4f}'.format(score[0]['val_loss']))
 print('x_test / mae       : {:5.4f}'.format(score[0]['val_mae']))
 print('x_test / mse       : {:5.4f}'.format(score[0]['val_mse']))
 ```
 %% Cell type:markdown id: tags:
 ### 7.3 - Make a prediction
 %% Cell type:code id: tags:
 ``` python
 # ---- Pick n entries from our test set
 n = 200
 ii = np.random.randint(1,len(x_test),n)
 x_sample = x_test[ii]
 y_sample = y_test[ii]
 ```
 %% Cell type:code id: tags:
 ``` python
 # ---- Make a predictions :
 # Sets the model in evaluation mode.
 loaded_model.eval()
 # Perform inference using the loaded model
 y_pred = loaded_model( x_sample )
 ```
 %% Cell type:code id: tags:
 ``` python
 # ---- Show it
 print('Wine    Prediction   Real   Delta')
 for i in range(n):
    pred   = y_pred[i][0].item()
    real   = y_sample[i][0].item()
    delta  = real-pred
    print(f'{i:03d}        {pred:.2f}       {real}      {delta:+.2f} ')
 ```
 %% Cell type:code id: tags:
 ``` python
 fidle.end()
 ```
 %% Cell type:markdown id: tags:
 ---
 <img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+%% Cell type:code id: tags:
+``` python
+```