From 56202a5cccf0e0ef5284ca2905bd48c6fe59adb0 Mon Sep 17 00:00:00 2001 From: Maxence Larrieu <m@larri.eu> Date: Wed, 14 Feb 2024 17:03:57 +0100 Subject: [PATCH] add datacite clients uga --- .../all_datacite_clients_for_uga.csv | 0 1-enrich-with-datacite/concatenate-enrich-dois.py | 7 ++++++- 2-produce-graph/pie-datacite-client.py | 1 - 3 files changed, 6 insertions(+), 2 deletions(-) rename 2-produce-graph/all_datacite_clients.csv => 1-enrich-with-datacite/all_datacite_clients_for_uga.csv (100%) diff --git a/2-produce-graph/all_datacite_clients.csv b/1-enrich-with-datacite/all_datacite_clients_for_uga.csv similarity index 100% rename from 2-produce-graph/all_datacite_clients.csv rename to 1-enrich-with-datacite/all_datacite_clients_for_uga.csv diff --git a/1-enrich-with-datacite/concatenate-enrich-dois.py b/1-enrich-with-datacite/concatenate-enrich-dois.py index 78810fb..08f4529 100644 --- a/1-enrich-with-datacite/concatenate-enrich-dois.py +++ b/1-enrich-with-datacite/concatenate-enrich-dois.py @@ -52,10 +52,15 @@ for doi in dois : #[:300] temp_rows.append(selected_md) ## ajouter ce dictionnaire à une liste print(f"\tadded\t\t{doi}") - +## if new datasets has been founded if temp_rows : df_fresh = pd.DataFrame(temp_rows) df_out = pd.concat([df_old, df_fresh], ignore_index=True) df_out.to_csv("../dois-uga.csv", index = False) print(f"\n\nnb of doi exported \t{len(df_out)}") + ## output another csv with datacite client and number of datasets + df_client_raw = df_out["client"].value_counts() + df_client_raw.to_csv("all_datacite_clients_for_uga.csv") + + diff --git a/2-produce-graph/pie-datacite-client.py b/2-produce-graph/pie-datacite-client.py index 0b4f7a3..7665d0a 100644 --- a/2-produce-graph/pie-datacite-client.py +++ b/2-produce-graph/pie-datacite-client.py @@ -5,7 +5,6 @@ df = my_fct.load_and_treat_csv() #print(df.columns) df_client_raw = df["client"].value_counts() -df_client_raw.to_csv("all_datacite_clients.csv") ## regroup small values in "other" treshold = 20 -- GitLab