From a1c5f8b89ede13ee5ab93229a9e93a03c29cd6f4 Mon Sep 17 00:00:00 2001
From: Chetouane <elias.chetouane@univ-grenoble-alpes.fr>
Date: Wed, 6 Dec 2023 11:47:21 +0100
Subject: [PATCH] =?UTF-8?q?demo=20pour=20atelier=20rencontre=20r=C3=A9f?=
 =?UTF-8?q?=C3=A9rents=20donn=C3=A9es?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 0-collect-data/z-datacite-demo.ipynb | 175 +++++++++++++++++++++++++++
 1 file changed, 175 insertions(+)
 create mode 100644 0-collect-data/z-datacite-demo.ipynb

diff --git a/0-collect-data/z-datacite-demo.ipynb b/0-collect-data/z-datacite-demo.ipynb
new file mode 100644
index 0000000..1de20a7
--- /dev/null
+++ b/0-collect-data/z-datacite-demo.ipynb
@@ -0,0 +1,175 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ce5cebe1-a058-4c0f-a5b5-23a02baa3521",
+   "metadata": {},
+   "source": [
+    "# Démo récupération des DOIs affiliés à l'UGA depuis Datacite\n",
+    "\n",
+    "## Documentation\n",
+    "* Doc générale API Datacite : https://support.datacite.org/docs/api\n",
+    "* Recherche d'un DOI : https://support.datacite.org/docs/api-sampling\n",
+    "* Pagination : https://support.datacite.org/docs/pagination\n",
+    "\n",
+    "## Code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "d229201d-4e79-40a8-9472-9ea46b344b1c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests, json, pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "7ce19b89-d5b7-4dbe-9fab-a15a81b42078",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# construction de l'url\n",
+    "url_pre = 'https://api.datacite.org/dois?affiliation=true&page[size]=1000'\n",
+    "url_query = '&query=(creators.affiliation.affiliationIdentifier:\"https://ror.org/02rx3b187\") AND (types.resourceTypeGeneral:Dataset)'\n",
+    "\n",
+    "# obtention des résultats de la requête\n",
+    "req = requests.get(url_pre + url_query)\n",
+    "results = req.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "ec73be41-21df-4448-b58e-e21306f6b9fa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Nombre de résultats trouvés : 142\n"
+     ]
+    }
+   ],
+   "source": [
+    "# ajouter les DOIs dans une liste, sans ajouter les résultats qui ne sont pas des DOIs\n",
+    "# ajouter les autres résultats qui nous intéressent dans des listes afin de construire un DataFrame\n",
+    "dois = []\n",
+    "not_dois = []\n",
+    "titles = []\n",
+    "dates = []\n",
+    "authors = []\n",
+    "\n",
+    "# boucler pour ajouter les informations relatives à chaque dépôt\n",
+    "num_dois = results[\"data\"]\n",
+    "nb_dois = len(num_dois)\n",
+    "\n",
+    "for item in num_dois :\n",
+    "    doi = item[\"id\"]\n",
+    "    # si l'identifiant n'est pas un doi, on ne le prend pas\n",
+    "    if item.get(\"type\") != \"dois\":\n",
+    "        print(\"Le résultat \" + str(item) + \" est de type \" + item.get(\"type\") + \" : \" + doi)\n",
+    "        not_dois.append(doi)\n",
+    "    # sinon, on récupère les informations dont on a besoin\n",
+    "    else:\n",
+    "        dois.append(doi)\n",
+    "        titles.append(item[\"attributes\"][\"titles\"][0].get(\"title\"))\n",
+    "        dates.append(item[\"attributes\"][\"created\"])\n",
+    "        # boucler pour obtenir tous les auteurs\n",
+    "        auts = []\n",
+    "        for aut in item[\"attributes\"][\"creators\"]:\n",
+    "            auts.append(aut.get(\"name\"))\n",
+    "        authors.append(auts)\n",
+    "\n",
+    "# affichage du résultat\n",
+    "print(\"Nombre de résultats trouvés : \" + str(nb_dois))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "e0722b5b-0059-4842-8e8a-2125239a7b7d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                                              DOI  \\\n",
+      "0                                  10.7280/d11h3x   \n",
+      "1                                  10.7280/d1mm37   \n",
+      "2                                  10.7280/d1595v   \n",
+      "3                                  10.7280/d1667w   \n",
+      "4                                  10.7280/d1b114   \n",
+      "..                                            ...   \n",
+      "137                  10.6084/m9.figshare.23488967   \n",
+      "138                               10.18150/wyyjk6   \n",
+      "139                               10.13127/efsm20   \n",
+      "140  10.5285/3ea504d8-41c2-40dc-86dc-284c341badaa   \n",
+      "141  10.5285/634ee206-258f-4b47-9237-efff4ef9eedd   \n",
+      "\n",
+      "                                                 Titre                  Date  \\\n",
+      "0    Annual Ice Velocity of the Greenland Ice Sheet...  2019-03-29T12:53:36Z   \n",
+      "1    Annual Ice Velocity of the Greenland Ice Sheet...  2018-12-14T09:39:45Z   \n",
+      "2    Annual Ice Velocity of the Greenland Ice Sheet...  2019-03-29T10:37:23Z   \n",
+      "3    Greenland Marine-Terminating Glacier Retreat Data  2020-12-01T18:09:19Z   \n",
+      "4    Dataset for: Fast retreat of Pope, Smith, and ...  2021-11-01T23:46:08Z   \n",
+      "..                                                 ...                   ...   \n",
+      "137  Additional file 1 of 3DVizSNP: a tool for rapi...  2023-06-10T03:21:52Z   \n",
+      "138  Estimates for recombination coefficients from ...  2022-04-21T14:17:28Z   \n",
+      "139  European Fault-Source Model 2020 (EFSM20): onl...  2022-10-30T16:28:46Z   \n",
+      "140  Ice radar data from Little Dome C, Antarctica,...  2022-03-04T09:26:18Z   \n",
+      "141  Polarimetric ApRES data on a profile across Do...  2021-09-16T11:17:15Z   \n",
+      "\n",
+      "                                               Auteurs  \n",
+      "0    [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...  \n",
+      "1    [Mouginot, Jeremie, Rignot, Eric, Millan, Roma...  \n",
+      "2    [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...  \n",
+      "3    [Wood, Michael, Rignot, Eric, Bjørk, Anders, V...  \n",
+      "4    [Milillo, Pietro, Rignot, Eric, Rizzoli, Paola...  \n",
+      "..                                                 ...  \n",
+      "137  [Sierk, Michael, Ratnayake, Shashikala, Wagle,...  \n",
+      "138  [Sakowski, Konrad, Borowik, Lukasz, Rochat, Né...  \n",
+      "139  [Basili, Roberto, Danciu, Laurentiu, Beauval, ...  \n",
+      "140  [Mulvaney, Robert, King, Edward, Martin, Carlo...  \n",
+      "141      [Corr, Hugh, Ritz, Catherine, Martin, Carlos]  \n",
+      "\n",
+      "[142 rows x 4 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# construction du DataFrame\n",
+    "df = pandas.DataFrame({'DOI':dois, 'Titre':titles, 'Date':dates, 'Auteurs':authors})\n",
+    "\n",
+    "print(df)\n",
+    "df.to_csv(\"z-resultats-demo-datacite.csv\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
-- 
GitLab