Skip to content
Snippets Groups Projects
concatenate-enrich-dois.py 1.8 KiB
Newer Older
Maxence Larrieu's avatar
Maxence Larrieu committed
import z_personal_functions as my_functions
import requests, json, random, pandas as pd


Maxence Larrieu's avatar
Maxence Larrieu committed
print("\n\nRunning concatenate-enrich-dois.py")

Maxence Larrieu's avatar
Maxence Larrieu committed
# ______0______ load DOIs and remove duplicate

## specifier la liste des entrepôts à importer
Maxence Larrieu's avatar
Maxence Larrieu committed
files_to_load = [ "zenodo", "datacite", "rdg", "bso-via-hal", "nakala" ] 
Maxence Larrieu's avatar
Maxence Larrieu committed
dois_raw = my_functions.from_files_load_dois(files_to_load)
print("\n\tDOIs loaded\t\t\t", len(dois_raw))
Maxence Larrieu's avatar
Maxence Larrieu committed
dois = list(set(dois_raw)) ## remove duplicate
print("\tDOIs to treat\t\t", len(dois))
Maxence Larrieu's avatar
Maxence Larrieu committed
# ______1_____ load DOIs already treater & get md from DataCite for new one
Maxence Larrieu's avatar
Maxence Larrieu committed

## pour essayer avec un seul DOI
# temp_doi = dois[random.randint(0, len(dois))]
Maxence Larrieu's avatar
Maxence Larrieu committed
# #temp_doi = "10.57745/QYIAWX" - 10.25656/01:8509
Maxence Larrieu's avatar
Maxence Larrieu committed
# print(temp_doi)
# raw_metadatas = my_functions.get_md_from_datacite(temp_doi)

doi_error = [] # retrieve doi error
temp_rows = [] # put data in dict before df

df_old = pd.read_csv("../dois-uga.csv")
Maxence Larrieu's avatar
Maxence Larrieu committed
print(f"\n\tnb of DOIs already treated\t{len(df_old)}")
Maxence Larrieu's avatar
Maxence Larrieu committed

# req dataCite and paste data following instructions
for doi in dois : #[:300]

	## if doi already treated
	if doi in df_old["doi"].values : 
		#print(f"\talready treated\t\t{doi}")
		continue

Maxence Larrieu's avatar
Maxence Larrieu committed
	## ___n___ get md from datacite
Maxence Larrieu's avatar
Maxence Larrieu committed
	raw_md = my_functions.get_md_from_datacite(doi)
Maxence Larrieu's avatar
Maxence Larrieu committed
	## to debug print(f"\t{doi}")
Maxence Larrieu's avatar
Maxence Larrieu committed

	### if doi not in datacite
	if raw_md == "error" : 
		doi_error.append(doi)
		continue

Maxence Larrieu's avatar
Maxence Larrieu committed
	## ___n___ from manual instructions retrieve appropriate data
Maxence Larrieu's avatar
Maxence Larrieu committed
	selected_md = my_functions.parse_datacite_md(raw_md) ## placer les resultats dans un dictionnaire
	temp_rows.append(selected_md) ## ajouter ce dictionnaire à une liste
	print(f"\tadded\t\t{doi}")


if temp_rows :
	df_fresh = pd.DataFrame(temp_rows)
	df_out = pd.concat([df_old, df_fresh], ignore_index=True)
	df_out.to_csv("../dois-uga.csv", index = False)
	print(f"\n\nnb of doi exported \t{len(df_out)}")