From e8b7601ab485fe76918c448946806278bcf755df Mon Sep 17 00:00:00 2001
From: Elias Chetouane <elias.chetouane@univ-grenoble-alpes.fr>
Date: Wed, 17 Apr 2024 16:23:44 +0200
Subject: [PATCH] =?UTF-8?q?Mise=20=C3=A0=20jour=20suppression=20des=20dois?=
 =?UTF-8?q?=20de=20version.=20Version=20fonctionnelle.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 1-enrich-with-datacite/concatenate-enrich-dois.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/1-enrich-with-datacite/concatenate-enrich-dois.py b/1-enrich-with-datacite/concatenate-enrich-dois.py
index c5e2c5f..b1c7657 100644
--- a/1-enrich-with-datacite/concatenate-enrich-dois.py
+++ b/1-enrich-with-datacite/concatenate-enrich-dois.py
@@ -56,14 +56,23 @@ for doi in dois : #[:300]
 ## if new datasets has been founded
 if temp_rows :
 	df_fresh = pd.DataFrame(temp_rows)
-
+	i_to_drop = []
+	dois_added = list(df_old["doi"])
+	to_del = []
 	for i in range(0, len(df_fresh)):
 		result = my_functions.get_origin_version(df_fresh.loc[df_fresh.index[i], "doi"])
-		if result[0] is in df_old["doi"]: df_fresh.drop(df_fresh.index[i])
-		else:
+		if result[0] not in dois_added: 
+			dois_added.append(result[0])
 			df_fresh.loc[df_fresh.index[i], "doi"] = result[0]
 			df_fresh.loc[df_fresh.index[i], "relation_nbInstances"] = result[1]
 			df_fresh.loc[df_fresh.index[i], "relation_nbCitation"] = result[2]
+		else:
+			to_del.append(i)
+			
+	df_fresh.drop(to_del, inplace=True)
+	print("Nombre de dois supprimés : " + str(len(to_del)))
+	
+	print("Nb dois a garder : " + str(len(dois_added)))
 
 	df_concat = pd.concat([df_old, df_fresh], ignore_index=True)
 
-- 
GitLab