Skip to content
Snippets Groups Projects
Commit 38e29814 authored by Maxence Larrieu's avatar Maxence Larrieu
Browse files

remove non wanted DC clients

parent 77cb87c6
No related branches found
No related tags found
No related merge requests found
......@@ -767,4 +767,4 @@
10.5281/zenodo.4784408
10.5281/zenodo.6397629
10.5281/zenodo.3611936
10.5281/zenodo.7795898
10.5281/zenodo.7795898
\ No newline at end of file
......@@ -10,26 +10,24 @@ rdg.prod,43,Recherche Data Gouv France,2022,https://recherche.data.gouv.fr/en
inist.humanum,26,Huma-Num,2020,https://nakala.fr
figshare.sage,14,figshare SAGE Publications,2018,
mcdy.dohrmi,12,dggv-e-publications,2020,https://www.dggv.de/publikationen/dggv-e-publikationen.html
rg.rg,4,ResearchGate,2016,https://www.researchgate.net/search/data
iris.iris,3,Incorporated Research Institutions for Seismology,2018,http://www.iris.edu/hq/
vqpf.dris,3,Direction des ressources et de l'information scientifique,2021,
tib.gfzbib,3,GFZpublic,2011,https://gfzpublic.gfz-potsdam.de
tib.repod,2,RepOD,2015,
ugraz.unipub,2,unipub,2019,http://unipub.uni-graz.at
bl.nerc,2,NERC Environmental Data Service,2011,https://eds.ukri.org
inist.epure,2,Éditions et presses universitaires de Reims,2020,
inist.opgc,1,Observatoire de Physique du Globe de Clermont-Ferrand,2017,
ardcx.nci,1,National Computational Infrastructure,2020,
umass.uma,1,University of Massachusetts (UMass) Amherst,2018,https://scholarworks.umass.edu/
bl.mendeley,1,Mendeley Data,2015,https://data.mendeley.com/
inist.eost,1,Ecole et Observatoire des Sciences de la Terre,2017,https://eost.unistra.fr/en/
crui.ingv,1,Istituto Nazionale di Geofisica e Vulcanologia (INGV),2013,http://data.ingv.it/
bl.iita,1,International Institute of Tropical Agriculture datasets,2017,http://data.iita.org/
ihumi.pub,1,IHU Méditerranée Infection,2020,
inist.omp,1,Observatoire Midi-Pyrénées,2011,
inist.ird,1,IRD,2016,
tib.gfz,1,GFZ Data Services,2011,https://dataservices.gfz-potsdam.de/portal/
ethz.zora,1,"Universität Zürich, ZORA",2013,https://www.zora.uzh.ch/
edi.edi,1,Environmental Data Initiative,2017,https://portal.edirepository.org/nis/home.jsp
tib.gfz,1,GFZ Data Services,2011,https://dataservices.gfz-potsdam.de/portal/
inist.ird,1,IRD,2016,
inist.omp,1,Observatoire Midi-Pyrénées,2011,
tug.openlib,1,TU Graz OPEN Library,2020,https://openlib.tugraz.at/
ethz.zora,1,"Universität Zürich, ZORA",2013,https://www.zora.uzh.ch/
inist.opgc,1,Observatoire de Physique du Globe de Clermont-Ferrand,2017,
crui.ingv,1,Istituto Nazionale di Geofisica e Vulcanologia (INGV),2013,http://data.ingv.it/
inist.eost,1,Ecole et Observatoire des Sciences de la Terre,2017,https://eost.unistra.fr/en/
bl.mendeley,1,Mendeley Data,2015,https://data.mendeley.com/
bl.iita,1,International Institute of Tropical Agriculture datasets,2017,http://data.iita.org/
umass.uma,1,University of Massachusetts (UMass) Amherst,2018,https://scholarworks.umass.edu/
ardcx.nci,1,National Computational Infrastructure,2020,
estdoi.ttu,1,TalTech,2019,https://digikogu.taltech.ee
......@@ -20,7 +20,7 @@ print("\tDOIs to treat\t\t", len(dois))
## pour essayer avec un seul DOI
# temp_doi = dois[random.randint(0, len(dois))]
# #temp_doi = "10.57745/QYIAWX" - 10.25656/01:8509
# temp_doi = "10.57745/QYIAWX" - 10.25656/01:8509
# print(temp_doi)
# raw_metadatas = my_functions.get_md_from_datacite(temp_doi)
......@@ -28,6 +28,7 @@ doi_error = [] # retrieve doi error
temp_rows = [] # put data in dict before df
df_old = pd.read_csv("../dois-uga.csv")
print(f"\n\tnb of DOIs already treated\t{len(df_old)}")
# req dataCite and paste data following instructions
......@@ -57,9 +58,11 @@ if temp_rows :
df_fresh = pd.DataFrame(temp_rows)
df_concat = pd.concat([df_old, df_fresh], ignore_index=True)
## remove not wanted datacite type
## remove not wanted datacite type & clients
type_to_explude = ["Book", "ConferencePaper", "ConferenceProceeding", "JournalArticle", "BookChapter", "Service", "Preprint"]
df_out = df_concat[ ~df_concat["resourceTypeGeneral"].isin(type_to_explude) ].copy()
clients_to_exclude = ["rg.rg", "inist.epure"]
df_out = df_concat[ ~df_concat["resourceTypeGeneral"].isin(type_to_explude) & ~df_concat["client"].isin(clients_to_exclude) ].copy()
## output main CSV
df_out.to_csv("../dois-uga.csv", index = False)
......@@ -69,7 +72,7 @@ if temp_rows :
with open("nb-dois.txt", 'w') as outf :
outf.write(str(len(df_out)))
## output another csv with datacite client and number of datasets
## for the website : output another csv with datacite client and number of datasets
df_client_raw = df_out["client"].value_counts().to_frame()
## get informations about each client
......
2075
\ No newline at end of file
2069
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment