import z_personal_functions as my_functions import requests, json, random, pandas as pd # ______0______ load DOIs and remove duplicate ## specifier la liste des entrepôts à importer repo_list = ["nakala", "bso-via-hal", "datacite", "zenodo", "rdg"] dois_raw = my_functions.from_repos_load_dois(repo_list) print("DOIs loaded\t\t\t", len(dois_raw)) ## remove duplicate dois = list(set(dois_raw)) print("DOIs to treat\t\t\t", len(dois)) # ______1_____ load metadata from dataCite and get specified metadatas ## pour essayer avec un seul DOI # # random doi 10.25656/01:8509 # temp_doi = dois[random.randint(0, len(dois))] # #temp_doi = "10.57745/QYIAWX" # print(temp_doi) # raw_metadatas = my_functions.get_md_from_datacite(temp_doi) doi_error = [] # retrieve doi error temp_rows = [] # put data in dict before df df_old = pd.read_csv("../dois-uga.csv") print(f"\nnb of dois already treated\t{len(df_old)}") # req dataCite and paste data following instructions for doi in dois : #[:300] ## if doi already treated if doi in df_old["doi"].values : #print(f"\talready treated\t\t{doi}") continue ## get md from datacite raw_md = my_functions.get_md_from_datacite(doi) ### if doi not in datacite if raw_md == "error" : doi_error.append(doi) continue ## from manual instruction retrieve accurate data selected_md = my_functions.parse_datacite_md(raw_md) ## placer les resultats dans un dictionnaire temp_rows.append(selected_md) ## ajouter ce dictionnaire à une liste print(f"\tadded\t\t{doi}") if temp_rows : df_fresh = pd.DataFrame(temp_rows) df_out = pd.concat([df_old, df_fresh], ignore_index=True) df_out.to_csv("../dois-uga.csv", index = False) print(f"\n\nnb of doi exported \t{len(df_out)}")