{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "<img width=\"800px\" src=\"../fidle/img/header.svg\"></img>\n", "\n", "# <!-- TITLE --> [PANDAS1] - Quelques exemples avec Pandas\n", "<!-- DESC --> pandas is another essential tool for the Scientific Python.\n", "<!-- AUTHOR : Jean-Luc Parouty (CNRS/SIMaP) -->\n", "\n", "## Objectives :\n", " - Understand how to slice a dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1 - A little cooking with datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get some data\n", "a = np.arange(50).reshape(10,5)\n", "print('Starting data: \\n',a)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create a DataFrame\n", "df_all = pd.DataFrame(a, columns=['A','B','C','D','E'])\n", "print('\\nDataFrame :')\n", "display(df_all)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Shuffle data\n", "df_all = df_all.sample(frac=1, axis=0)\n", "print('\\nDataFrame randomly shuffled :')\n", "display(df_all)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get a train part\n", "df_train = df_all.sample(frac=0.8, axis=0)\n", "print('\\nTrain set (80%) :')\n", "display(df_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Get test set as all - train\n", "df_test = df_all.drop(df_train.index)\n", "print('\\nTest set (all - train) :')\n", "display(df_test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x_train = df_train.drop('E', axis=1)\n", "y_train = df_train['E']\n", "x_test = df_test.drop('E', axis=1)\n", "y_test = df_test['E']\n", "display(x_train)\n", "display(y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "<img width=\"80px\" src=\"../fidle/img/logo-paysage.svg\"></img>" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.2 ('fidle-env')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "b3929042cc22c1274d74e3e946c52b845b57cb6d84f2d591ffe0519b38e4896d" } } }, "nbformat": 4, "nbformat_minor": 2 }