Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import z_personal_functions as my_functions
import requests, json, random, pandas as pd
# ______0______ load DOIs and remove duplicate
## specifier la liste des entrepôts à importer
repo_list = ["nakala", "bso-via-hal", "datacite", "zenodo", "rdg"]
dois_raw = my_functions.from_repos_load_dois(repo_list)
print("DOIs loaded\t\t\t", len(dois_raw))
## remove duplicate
dois = list(set(dois_raw))
print("DOIs to treat\t\t\t", len(dois))
# ______1_____ load metadata from dataCite and get specified metadatas
## pour essayer avec un seul DOI
# # random doi 10.25656/01:8509
# temp_doi = dois[random.randint(0, len(dois))]
# #temp_doi = "10.57745/QYIAWX"
# print(temp_doi)
# raw_metadatas = my_functions.get_md_from_datacite(temp_doi)
doi_error = [] # retrieve doi error
temp_rows = [] # put data in dict before df
df_old = pd.read_csv("../dois-uga.csv")
print(f"\nnb of dois already treated\t{len(df_old)}")
# req dataCite and paste data following instructions
for doi in dois : #[:300]
## if doi already treated
if doi in df_old["doi"].values :
#print(f"\talready treated\t\t{doi}")
continue
## get md from datacite
raw_md = my_functions.get_md_from_datacite(doi)
### if doi not in datacite
if raw_md == "error" :
doi_error.append(doi)
continue
## from manual instruction retrieve accurate data
selected_md = my_functions.parse_datacite_md(raw_md) ## placer les resultats dans un dictionnaire
temp_rows.append(selected_md) ## ajouter ce dictionnaire à une liste
print(f"\tadded\t\t{doi}")
if temp_rows :
df_fresh = pd.DataFrame(temp_rows)
df_out = pd.concat([df_old, df_fresh], ignore_index=True)
df_out.to_csv("../dois-uga.csv", index = False)
print(f"\n\nnb of doi exported \t{len(df_out)}")