From e2e466d596617dbaee3e7c002c4ab5a7d532e8cf Mon Sep 17 00:00:00 2001
From: Elias Chetouane <elias.chetouane@univ-grenoble-alpes.fr>
Date: Fri, 10 May 2024 13:12:28 +0200
Subject: [PATCH] Version fonctionelle avec les colonnes "dois_traveled" et
 "all_relations"

---
 1-enrich-with-datacite/concatenate-enrich-dois.py | 12 ++++++------
 1-enrich-with-datacite/z_personal_functions.py    | 14 +++++++++-----
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/1-enrich-with-datacite/concatenate-enrich-dois.py b/1-enrich-with-datacite/concatenate-enrich-dois.py
index 605175d..111059b 100644
--- a/1-enrich-with-datacite/concatenate-enrich-dois.py
+++ b/1-enrich-with-datacite/concatenate-enrich-dois.py
@@ -56,17 +56,17 @@ for doi in dois : #[:300]
 ## if new datasets has been founded
 if temp_rows :
 	df_fresh = pd.DataFrame(temp_rows)
-	i_to_drop = []
 	dois_added = list(df_old["doi"])
 	to_del = []
 	for i in range(0, len(df_fresh)):
-		result = my_functions.get_origin_version(df_fresh.loc[df_fresh.index[i], "doi"])
+		result = my_functions.get_origin_version(df_fresh.loc[i, "doi"])
 		if result[0] not in dois_added: 
 			dois_added.append(result[0])
-			df_fresh.loc[df_fresh.index[i], "doi"] = result[0]
-			df_fresh.loc[df_fresh.index[i], "relation_nbInstances"] = result[1]
-			df_fresh.loc[df_fresh.index[i], "relation_nbCitation"] = result[2]
-			df_fresh.loc[df_fresh.index[i], "relations_all"] = str(result[3])
+			df_fresh.loc[i, "doi"] = result[0]
+			df_fresh.loc[i, "relation_nbInstances"] = result[1]
+			df_fresh.loc[i, "relation_nbCitation"] = result[2]
+			if str(result[3]) != "[]": df_fresh.loc[i, "traveled_dois"] = str(result[3])
+			if str(result[4]) != "[]": df_fresh.loc[i, "all_relations"] = str(result[4])
 		else:
 			to_del.append(i)
 			
diff --git a/1-enrich-with-datacite/z_personal_functions.py b/1-enrich-with-datacite/z_personal_functions.py
index 5ed1fca..a7ddc7d 100644
--- a/1-enrich-with-datacite/z_personal_functions.py
+++ b/1-enrich-with-datacite/z_personal_functions.py
@@ -1,10 +1,12 @@
 import requests, json
 
-def get_origin_version(doi, count=0, cited=0, history=[]):
+def get_origin_version(doi, count=0, cited=0, history=[], first=True):
+    if first: history=[] # ligne ajoutée pour éviter certains soucis de cache où history n'est pas vide au premier appel de la fonction
     cited = 0
     req = requests.get( f"https://api.datacite.org/dois/{doi}" )
     res = req.json()
-    result = (doi, count, cited, history)
+    final = []
+    result = (doi, count, cited, history, final)
     try:
         related = res["data"]["attributes"]["relatedIdentifiers"]
     except:
@@ -13,17 +15,19 @@ def get_origin_version(doi, count=0, cited=0, history=[]):
         ignore = False
         duplicate = False
         for i in related:
-            history.append([i.get("relationType"), i.get("relatedIdentifier")])
+            final.append(i.get("relationType"))
             if i.get("relationType") == "IsVersionOf" and i.get("relatedIdentifierType") == "DOI": 
                 ignore = True
                 elem_to_save_i = i.get("relatedIdentifier")
+                history.append([i.get("relationType"), i.get("relatedIdentifier")])
             if i.get("relationType") == "isCitedBy" and i.get("relatedIdentifierType") == "DOI": cited += 1
             if i.get("relationType") == "IsIdenticalTo" and i.get("relatedIdentifierType") == "DOI":
                 duplicate = True
                 elem_to_save_d = i.get("relatedIdentifier")
+                history.append([i.get("relationType"), i.get("relatedIdentifier")])
         if duplicate and not(ignore):
-            result = (elem_to_save_d, count, cited, history)
-        if ignore: result = get_origin_version(elem_to_save_i, count+1, cited, history)
+            result = (elem_to_save_d, count, cited, history, final)
+        if ignore: result = get_origin_version(elem_to_save_i, count+1, cited, history, False)
     return result
 
 def get_md_from_datacite( doi ) : 
-- 
GitLab