Skip to content
Snippets Groups Projects
Commit e2e466d5 authored by Elias Chetouane's avatar Elias Chetouane
Browse files

Version fonctionelle avec les colonnes "dois_traveled" et "all_relations"

parent e4cd98ec
No related branches found
No related tags found
No related merge requests found
......@@ -56,17 +56,17 @@ for doi in dois : #[:300]
## if new datasets has been founded
if temp_rows :
df_fresh = pd.DataFrame(temp_rows)
i_to_drop = []
dois_added = list(df_old["doi"])
to_del = []
for i in range(0, len(df_fresh)):
result = my_functions.get_origin_version(df_fresh.loc[df_fresh.index[i], "doi"])
result = my_functions.get_origin_version(df_fresh.loc[i, "doi"])
if result[0] not in dois_added:
dois_added.append(result[0])
df_fresh.loc[df_fresh.index[i], "doi"] = result[0]
df_fresh.loc[df_fresh.index[i], "relation_nbInstances"] = result[1]
df_fresh.loc[df_fresh.index[i], "relation_nbCitation"] = result[2]
df_fresh.loc[df_fresh.index[i], "relations_all"] = str(result[3])
df_fresh.loc[i, "doi"] = result[0]
df_fresh.loc[i, "relation_nbInstances"] = result[1]
df_fresh.loc[i, "relation_nbCitation"] = result[2]
if str(result[3]) != "[]": df_fresh.loc[i, "traveled_dois"] = str(result[3])
if str(result[4]) != "[]": df_fresh.loc[i, "all_relations"] = str(result[4])
else:
to_del.append(i)
......
import requests, json
def get_origin_version(doi, count=0, cited=0, history=[]):
def get_origin_version(doi, count=0, cited=0, history=[], first=True):
if first: history=[] # ligne ajoutée pour éviter certains soucis de cache où history n'est pas vide au premier appel de la fonction
cited = 0
req = requests.get( f"https://api.datacite.org/dois/{doi}" )
res = req.json()
result = (doi, count, cited, history)
final = []
result = (doi, count, cited, history, final)
try:
related = res["data"]["attributes"]["relatedIdentifiers"]
except:
......@@ -13,17 +15,19 @@ def get_origin_version(doi, count=0, cited=0, history=[]):
ignore = False
duplicate = False
for i in related:
history.append([i.get("relationType"), i.get("relatedIdentifier")])
final.append(i.get("relationType"))
if i.get("relationType") == "IsVersionOf" and i.get("relatedIdentifierType") == "DOI":
ignore = True
elem_to_save_i = i.get("relatedIdentifier")
history.append([i.get("relationType"), i.get("relatedIdentifier")])
if i.get("relationType") == "isCitedBy" and i.get("relatedIdentifierType") == "DOI": cited += 1
if i.get("relationType") == "IsIdenticalTo" and i.get("relatedIdentifierType") == "DOI":
duplicate = True
elem_to_save_d = i.get("relatedIdentifier")
history.append([i.get("relationType"), i.get("relatedIdentifier")])
if duplicate and not(ignore):
result = (elem_to_save_d, count, cited, history)
if ignore: result = get_origin_version(elem_to_save_i, count+1, cited, history)
result = (elem_to_save_d, count, cited, history, final)
if ignore: result = get_origin_version(elem_to_save_i, count+1, cited, history, False)
return result
def get_md_from_datacite( doi ) :
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment