From a1c5f8b89ede13ee5ab93229a9e93a03c29cd6f4 Mon Sep 17 00:00:00 2001 From: Chetouane <elias.chetouane@univ-grenoble-alpes.fr> Date: Wed, 6 Dec 2023 11:47:21 +0100 Subject: [PATCH] =?UTF-8?q?demo=20pour=20atelier=20rencontre=20r=C3=A9f?= =?UTF-8?q?=C3=A9rents=20donn=C3=A9es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 0-collect-data/z-datacite-demo.ipynb | 175 +++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 0-collect-data/z-datacite-demo.ipynb diff --git a/0-collect-data/z-datacite-demo.ipynb b/0-collect-data/z-datacite-demo.ipynb new file mode 100644 index 0000000..1de20a7 --- /dev/null +++ b/0-collect-data/z-datacite-demo.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ce5cebe1-a058-4c0f-a5b5-23a02baa3521", + "metadata": {}, + "source": [ + "# Démo récupération des DOIs affiliés à l'UGA depuis Datacite\n", + "\n", + "## Documentation\n", + "* Doc générale API Datacite : https://support.datacite.org/docs/api\n", + "* Recherche d'un DOI : https://support.datacite.org/docs/api-sampling\n", + "* Pagination : https://support.datacite.org/docs/pagination\n", + "\n", + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d229201d-4e79-40a8-9472-9ea46b344b1c", + "metadata": {}, + "outputs": [], + "source": [ + "import requests, json, pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7ce19b89-d5b7-4dbe-9fab-a15a81b42078", + "metadata": {}, + "outputs": [], + "source": [ + "# construction de l'url\n", + "url_pre = 'https://api.datacite.org/dois?affiliation=true&page[size]=1000'\n", + "url_query = '&query=(creators.affiliation.affiliationIdentifier:\"https://ror.org/02rx3b187\") AND (types.resourceTypeGeneral:Dataset)'\n", + "\n", + "# obtention des résultats de la requête\n", + "req = requests.get(url_pre + url_query)\n", + "results = req.json()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ec73be41-21df-4448-b58e-e21306f6b9fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nombre de résultats trouvés : 142\n" + ] + } + ], + "source": [ + "# ajouter les DOIs dans une liste, sans ajouter les résultats qui ne sont pas des DOIs\n", + "# ajouter les autres résultats qui nous intéressent dans des listes afin de construire un DataFrame\n", + "dois = []\n", + "not_dois = []\n", + "titles = []\n", + "dates = []\n", + "authors = []\n", + "\n", + "# boucler pour ajouter les informations relatives à chaque dépôt\n", + "num_dois = results[\"data\"]\n", + "nb_dois = len(num_dois)\n", + "\n", + "for item in num_dois :\n", + " doi = item[\"id\"]\n", + " # si l'identifiant n'est pas un doi, on ne le prend pas\n", + " if item.get(\"type\") != \"dois\":\n", + " print(\"Le résultat \" + str(item) + \" est de type \" + item.get(\"type\") + \" : \" + doi)\n", + " not_dois.append(doi)\n", + " # sinon, on récupère les informations dont on a besoin\n", + " else:\n", + " dois.append(doi)\n", + " titles.append(item[\"attributes\"][\"titles\"][0].get(\"title\"))\n", + " dates.append(item[\"attributes\"][\"created\"])\n", + " # boucler pour obtenir tous les auteurs\n", + " auts = []\n", + " for aut in item[\"attributes\"][\"creators\"]:\n", + " auts.append(aut.get(\"name\"))\n", + " authors.append(auts)\n", + "\n", + "# affichage du résultat\n", + "print(\"Nombre de résultats trouvés : \" + str(nb_dois))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "e0722b5b-0059-4842-8e8a-2125239a7b7d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " DOI \\\n", + "0 10.7280/d11h3x \n", + "1 10.7280/d1mm37 \n", + "2 10.7280/d1595v \n", + "3 10.7280/d1667w \n", + "4 10.7280/d1b114 \n", + ".. ... \n", + "137 10.6084/m9.figshare.23488967 \n", + "138 10.18150/wyyjk6 \n", + "139 10.13127/efsm20 \n", + "140 10.5285/3ea504d8-41c2-40dc-86dc-284c341badaa \n", + "141 10.5285/634ee206-258f-4b47-9237-efff4ef9eedd \n", + "\n", + " Titre Date \\\n", + "0 Annual Ice Velocity of the Greenland Ice Sheet... 2019-03-29T12:53:36Z \n", + "1 Annual Ice Velocity of the Greenland Ice Sheet... 2018-12-14T09:39:45Z \n", + "2 Annual Ice Velocity of the Greenland Ice Sheet... 2019-03-29T10:37:23Z \n", + "3 Greenland Marine-Terminating Glacier Retreat Data 2020-12-01T18:09:19Z \n", + "4 Dataset for: Fast retreat of Pope, Smith, and ... 2021-11-01T23:46:08Z \n", + ".. ... ... \n", + "137 Additional file 1 of 3DVizSNP: a tool for rapi... 2023-06-10T03:21:52Z \n", + "138 Estimates for recombination coefficients from ... 2022-04-21T14:17:28Z \n", + "139 European Fault-Source Model 2020 (EFSM20): onl... 2022-10-30T16:28:46Z \n", + "140 Ice radar data from Little Dome C, Antarctica,... 2022-03-04T09:26:18Z \n", + "141 Polarimetric ApRES data on a profile across Do... 2021-09-16T11:17:15Z \n", + "\n", + " Auteurs \n", + "0 [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be... \n", + "1 [Mouginot, Jeremie, Rignot, Eric, Millan, Roma... \n", + "2 [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be... \n", + "3 [Wood, Michael, Rignot, Eric, Bjørk, Anders, V... \n", + "4 [Milillo, Pietro, Rignot, Eric, Rizzoli, Paola... \n", + ".. ... \n", + "137 [Sierk, Michael, Ratnayake, Shashikala, Wagle,... \n", + "138 [Sakowski, Konrad, Borowik, Lukasz, Rochat, Né... \n", + "139 [Basili, Roberto, Danciu, Laurentiu, Beauval, ... \n", + "140 [Mulvaney, Robert, King, Edward, Martin, Carlo... \n", + "141 [Corr, Hugh, Ritz, Catherine, Martin, Carlos] \n", + "\n", + "[142 rows x 4 columns]\n" + ] + } + ], + "source": [ + "# construction du DataFrame\n", + "df = pandas.DataFrame({'DOI':dois, 'Titre':titles, 'Date':dates, 'Auteurs':authors})\n", + "\n", + "print(df)\n", + "df.to_csv(\"z-resultats-demo-datacite.csv\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab