diff --git a/0-collect-data/datacite.py b/0-collect-data/datacite.py index 5218822b3aba3fc349424c2550eb411e4d34316f..5d9f3cdcd87a81a7f077278eaf97b7b489f5e312 100644 --- a/0-collect-data/datacite.py +++ b/0-collect-data/datacite.py @@ -4,6 +4,7 @@ """ ## todo - recherche par affiliation contributeurs ? +- v2 : périmètre UGA : ajouter les ROR des unités, des établissements/laboratoires associés (par exemple Grenoble INP) ## Documentation * Doc générale API Datacite : https://support.datacite.org/docs/api diff --git a/0-collect-data/rdg.py b/0-collect-data/rdg.py index 1f1e9e717a2de5e5307851e0db2d727322239cd8..84ec29903104abb34d677bf159212b96f9f03a04 100644 --- a/0-collect-data/rdg.py +++ b/0-collect-data/rdg.py @@ -75,14 +75,12 @@ for url in urls: # on supprime les doublons -for i in dois: - while(dois.count(i) > 1): - dois.remove(i) +unique_dois = list(set(dois)) -print("Nombre de dois différents trouvés : " + str(len(dois))) +print("Nombre de dois différents trouvés : " + str(len(unique_dois))) # exporter la liste de DOI au format txt with open("rdg-dois.txt", 'w') as f : - [f.write(f"{line[4:]}\n") for line in dois] # [4:] pour retirer "doi:" au début de chaque ligne + [f.write(f"{line[4:]}\n") for line in unique_dois] # [4:] pour retirer "doi:" au début de chaque ligne diff --git a/0-collect-data/z-datacite-demo.ipynb b/0-collect-data/z-datacite-demo.ipynb index 1de20a75f06aa67ceec513dcb92c430e38623ab2..b17617f26a197950d2c6c402ebff8e87a5c2be88 100644 --- a/0-collect-data/z-datacite-demo.ipynb +++ b/0-collect-data/z-datacite-demo.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 26, "id": "d229201d-4e79-40a8-9472-9ea46b344b1c", "metadata": {}, "outputs": [], @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 27, "id": "7ce19b89-d5b7-4dbe-9fab-a15a81b42078", "metadata": {}, "outputs": [], @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 28, "id": "ec73be41-21df-4448-b58e-e21306f6b9fa", "metadata": {}, "outputs": [ @@ -65,10 +65,10 @@ "authors = []\n", "\n", "# boucler pour ajouter les informations relatives à chaque dépôt\n", - "num_dois = results[\"data\"]\n", - "nb_dois = len(num_dois)\n", + "all_md_list = results[\"data\"]\n", + "nb_dois = len(all_md_list)\n", "\n", - "for item in num_dois :\n", + "for item in all_md_list :\n", " doi = item[\"id\"]\n", " # si l'identifiant n'est pas un doi, on ne le prend pas\n", " if item.get(\"type\") != \"dois\":\n", @@ -91,64 +91,183 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 29, "id": "e0722b5b-0059-4842-8e8a-2125239a7b7d", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " DOI \\\n", - "0 10.7280/d11h3x \n", - "1 10.7280/d1mm37 \n", - "2 10.7280/d1595v \n", - "3 10.7280/d1667w \n", - "4 10.7280/d1b114 \n", - ".. ... \n", - "137 10.6084/m9.figshare.23488967 \n", - "138 10.18150/wyyjk6 \n", - "139 10.13127/efsm20 \n", - "140 10.5285/3ea504d8-41c2-40dc-86dc-284c341badaa \n", - "141 10.5285/634ee206-258f-4b47-9237-efff4ef9eedd \n", - "\n", - " Titre Date \\\n", - "0 Annual Ice Velocity of the Greenland Ice Sheet... 2019-03-29T12:53:36Z \n", - "1 Annual Ice Velocity of the Greenland Ice Sheet... 2018-12-14T09:39:45Z \n", - "2 Annual Ice Velocity of the Greenland Ice Sheet... 2019-03-29T10:37:23Z \n", - "3 Greenland Marine-Terminating Glacier Retreat Data 2020-12-01T18:09:19Z \n", - "4 Dataset for: Fast retreat of Pope, Smith, and ... 2021-11-01T23:46:08Z \n", - ".. ... ... \n", - "137 Additional file 1 of 3DVizSNP: a tool for rapi... 2023-06-10T03:21:52Z \n", - "138 Estimates for recombination coefficients from ... 2022-04-21T14:17:28Z \n", - "139 European Fault-Source Model 2020 (EFSM20): onl... 2022-10-30T16:28:46Z \n", - "140 Ice radar data from Little Dome C, Antarctica,... 2022-03-04T09:26:18Z \n", - "141 Polarimetric ApRES data on a profile across Do... 2021-09-16T11:17:15Z \n", - "\n", - " Auteurs \n", - "0 [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be... \n", - "1 [Mouginot, Jeremie, Rignot, Eric, Millan, Roma... \n", - "2 [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be... \n", - "3 [Wood, Michael, Rignot, Eric, Bjørk, Anders, V... \n", - "4 [Milillo, Pietro, Rignot, Eric, Rizzoli, Paola... \n", - ".. ... \n", - "137 [Sierk, Michael, Ratnayake, Shashikala, Wagle,... \n", - "138 [Sakowski, Konrad, Borowik, Lukasz, Rochat, Né... \n", - "139 [Basili, Roberto, Danciu, Laurentiu, Beauval, ... \n", - "140 [Mulvaney, Robert, King, Edward, Martin, Carlo... \n", - "141 [Corr, Hugh, Ritz, Catherine, Martin, Carlos] \n", - "\n", - "[142 rows x 4 columns]\n" - ] + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>DOI</th>\n", + " <th>Titre</th>\n", + " <th>Date</th>\n", + " <th>Auteurs</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>10.7280/d11h3x</td>\n", + " <td>Annual Ice Velocity of the Greenland Ice Sheet...</td>\n", + " <td>2019-03-29T12:53:36Z</td>\n", + " <td>[Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>10.7280/d1mm37</td>\n", + " <td>Annual Ice Velocity of the Greenland Ice Sheet...</td>\n", + " <td>2018-12-14T09:39:45Z</td>\n", + " <td>[Mouginot, Jeremie, Rignot, Eric, Millan, Roma...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>10.7280/d1667w</td>\n", + " <td>Greenland Marine-Terminating Glacier Retreat Data</td>\n", + " <td>2020-12-01T18:09:19Z</td>\n", + " <td>[Wood, Michael, Rignot, Eric, Bjørk, Anders, V...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>10.7280/d1595v</td>\n", + " <td>Annual Ice Velocity of the Greenland Ice Sheet...</td>\n", + " <td>2019-03-29T10:37:23Z</td>\n", + " <td>[Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>10.7280/d1b114</td>\n", + " <td>Dataset for: Fast retreat of Pope, Smith, and ...</td>\n", + " <td>2021-11-01T23:46:08Z</td>\n", + " <td>[Milillo, Pietro, Rignot, Eric, Rizzoli, Paola...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>137</th>\n", + " <td>10.6084/m9.figshare.23488967</td>\n", + " <td>Additional file 1 of 3DVizSNP: a tool for rapi...</td>\n", + " <td>2023-06-10T03:21:52Z</td>\n", + " <td>[Sierk, Michael, Ratnayake, Shashikala, Wagle,...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>138</th>\n", + " <td>10.18150/wyyjk6</td>\n", + " <td>Estimates for recombination coefficients from ...</td>\n", + " <td>2022-04-21T14:17:28Z</td>\n", + " <td>[Sakowski, Konrad, Borowik, Lukasz, Rochat, Né...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>139</th>\n", + " <td>10.13127/efsm20</td>\n", + " <td>European Fault-Source Model 2020 (EFSM20): onl...</td>\n", + " <td>2022-10-30T16:28:46Z</td>\n", + " <td>[Basili, Roberto, Danciu, Laurentiu, Beauval, ...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>140</th>\n", + " <td>10.5285/3ea504d8-41c2-40dc-86dc-284c341badaa</td>\n", + " <td>Ice radar data from Little Dome C, Antarctica,...</td>\n", + " <td>2022-03-04T09:26:18Z</td>\n", + " <td>[Mulvaney, Robert, King, Edward, Martin, Carlo...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>141</th>\n", + " <td>10.5285/634ee206-258f-4b47-9237-efff4ef9eedd</td>\n", + " <td>Polarimetric ApRES data on a profile across Do...</td>\n", + " <td>2021-09-16T11:17:15Z</td>\n", + " <td>[Corr, Hugh, Ritz, Catherine, Martin, Carlos]</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>142 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " DOI \\\n", + "0 10.7280/d11h3x \n", + "1 10.7280/d1mm37 \n", + "2 10.7280/d1667w \n", + "3 10.7280/d1595v \n", + "4 10.7280/d1b114 \n", + ".. ... \n", + "137 10.6084/m9.figshare.23488967 \n", + "138 10.18150/wyyjk6 \n", + "139 10.13127/efsm20 \n", + "140 10.5285/3ea504d8-41c2-40dc-86dc-284c341badaa \n", + "141 10.5285/634ee206-258f-4b47-9237-efff4ef9eedd \n", + "\n", + " Titre Date \\\n", + "0 Annual Ice Velocity of the Greenland Ice Sheet... 2019-03-29T12:53:36Z \n", + "1 Annual Ice Velocity of the Greenland Ice Sheet... 2018-12-14T09:39:45Z \n", + "2 Greenland Marine-Terminating Glacier Retreat Data 2020-12-01T18:09:19Z \n", + "3 Annual Ice Velocity of the Greenland Ice Sheet... 2019-03-29T10:37:23Z \n", + "4 Dataset for: Fast retreat of Pope, Smith, and ... 2021-11-01T23:46:08Z \n", + ".. ... ... \n", + "137 Additional file 1 of 3DVizSNP: a tool for rapi... 2023-06-10T03:21:52Z \n", + "138 Estimates for recombination coefficients from ... 2022-04-21T14:17:28Z \n", + "139 European Fault-Source Model 2020 (EFSM20): onl... 2022-10-30T16:28:46Z \n", + "140 Ice radar data from Little Dome C, Antarctica,... 2022-03-04T09:26:18Z \n", + "141 Polarimetric ApRES data on a profile across Do... 2021-09-16T11:17:15Z \n", + "\n", + " Auteurs \n", + "0 [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be... \n", + "1 [Mouginot, Jeremie, Rignot, Eric, Millan, Roma... \n", + "2 [Wood, Michael, Rignot, Eric, Bjørk, Anders, V... \n", + "3 [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be... \n", + "4 [Milillo, Pietro, Rignot, Eric, Rizzoli, Paola... \n", + ".. ... \n", + "137 [Sierk, Michael, Ratnayake, Shashikala, Wagle,... \n", + "138 [Sakowski, Konrad, Borowik, Lukasz, Rochat, Né... \n", + "139 [Basili, Roberto, Danciu, Laurentiu, Beauval, ... \n", + "140 [Mulvaney, Robert, King, Edward, Martin, Carlo... \n", + "141 [Corr, Hugh, Ritz, Catherine, Martin, Carlos] \n", + "\n", + "[142 rows x 4 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# construction du DataFrame\n", "df = pandas.DataFrame({'DOI':dois, 'Titre':titles, 'Date':dates, 'Auteurs':authors})\n", "\n", - "print(df)\n", - "df.to_csv(\"z-resultats-demo-datacite.csv\")" + "df.to_csv(\"z-resultats-demo-datacite.csv\")\n", + "df" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0308feea-2560-4e33-836a-285a65db2429", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {