{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n",
    "\n",
    "# <!-- TITLE --> [PANDAS1] - Quelques exemples avec Pandas\n",
    "<!-- DESC --> pandas is another essential tool for the Scientific Python.\n",
    "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n",
    "\n",
    "## Objectives :\n",
    " - Understand how to slice a dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1 - A little cooking with datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy  as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get some data\n",
    "a = np.arange(50).reshape(10,5)\n",
    "print('Starting data: \\n',a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a DataFrame\n",
    "df_all = pd.DataFrame(a, columns=['A','B','C','D','E'])\n",
    "print('\\nDataFrame :')\n",
    "display(df_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Shuffle data\n",
    "df_all = df_all.sample(frac=1, axis=0)\n",
    "print('\\nDataFrame randomly shuffled :')\n",
    "display(df_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get a train part\n",
    "df_train = df_all.sample(frac=0.8, axis=0)\n",
    "print('\\nTrain set (80%) :')\n",
    "display(df_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Get test set as all - train\n",
    "df_test = df_all.drop(df_train.index)\n",
    "print('\\nTest set (all - train) :')\n",
    "display(df_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train = df_train.drop('E',  axis=1)\n",
    "y_train = df_train['E']\n",
    "x_test  = df_test.drop('E',   axis=1)\n",
    "y_test  = df_test['E']\n",
    "display(x_train)\n",
    "display(y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "<img width=\"80px\" src=\"../fidle/img/logo-paysage.svg\"></img>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.2 ('fidle-env')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "b3929042cc22c1274d74e3e946c52b845b57cb6d84f2d591ffe0519b38e4896d"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}