From e8b7601ab485fe76918c448946806278bcf755df Mon Sep 17 00:00:00 2001 From: Elias Chetouane <elias.chetouane@univ-grenoble-alpes.fr> Date: Wed, 17 Apr 2024 16:23:44 +0200 Subject: [PATCH] =?UTF-8?q?Mise=20=C3=A0=20jour=20suppression=20des=20dois?= =?UTF-8?q?=20de=20version.=20Version=20fonctionnelle.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 1-enrich-with-datacite/concatenate-enrich-dois.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/1-enrich-with-datacite/concatenate-enrich-dois.py b/1-enrich-with-datacite/concatenate-enrich-dois.py index c5e2c5f..b1c7657 100644 --- a/1-enrich-with-datacite/concatenate-enrich-dois.py +++ b/1-enrich-with-datacite/concatenate-enrich-dois.py @@ -56,14 +56,23 @@ for doi in dois : #[:300] ## if new datasets has been founded if temp_rows : df_fresh = pd.DataFrame(temp_rows) - + i_to_drop = [] + dois_added = list(df_old["doi"]) + to_del = [] for i in range(0, len(df_fresh)): result = my_functions.get_origin_version(df_fresh.loc[df_fresh.index[i], "doi"]) - if result[0] is in df_old["doi"]: df_fresh.drop(df_fresh.index[i]) - else: + if result[0] not in dois_added: + dois_added.append(result[0]) df_fresh.loc[df_fresh.index[i], "doi"] = result[0] df_fresh.loc[df_fresh.index[i], "relation_nbInstances"] = result[1] df_fresh.loc[df_fresh.index[i], "relation_nbCitation"] = result[2] + else: + to_del.append(i) + + df_fresh.drop(to_del, inplace=True) + print("Nombre de dois supprimés : " + str(len(to_del))) + + print("Nb dois a garder : " + str(len(dois_added))) df_concat = pd.concat([df_old, df_fresh], ignore_index=True) -- GitLab