Skip to content
Snippets Groups Projects
Commit 38e29814 authored by Maxence Larrieu's avatar Maxence Larrieu
Browse files

remove non wanted DC clients

parent 77cb87c6
No related branches found
No related tags found
No related merge requests found
...@@ -767,4 +767,4 @@ ...@@ -767,4 +767,4 @@
10.5281/zenodo.4784408 10.5281/zenodo.4784408
10.5281/zenodo.6397629 10.5281/zenodo.6397629
10.5281/zenodo.3611936 10.5281/zenodo.3611936
10.5281/zenodo.7795898 10.5281/zenodo.7795898
\ No newline at end of file
...@@ -10,26 +10,24 @@ rdg.prod,43,Recherche Data Gouv France,2022,https://recherche.data.gouv.fr/en ...@@ -10,26 +10,24 @@ rdg.prod,43,Recherche Data Gouv France,2022,https://recherche.data.gouv.fr/en
inist.humanum,26,Huma-Num,2020,https://nakala.fr inist.humanum,26,Huma-Num,2020,https://nakala.fr
figshare.sage,14,figshare SAGE Publications,2018, figshare.sage,14,figshare SAGE Publications,2018,
mcdy.dohrmi,12,dggv-e-publications,2020,https://www.dggv.de/publikationen/dggv-e-publikationen.html mcdy.dohrmi,12,dggv-e-publications,2020,https://www.dggv.de/publikationen/dggv-e-publikationen.html
rg.rg,4,ResearchGate,2016,https://www.researchgate.net/search/data
iris.iris,3,Incorporated Research Institutions for Seismology,2018,http://www.iris.edu/hq/ iris.iris,3,Incorporated Research Institutions for Seismology,2018,http://www.iris.edu/hq/
vqpf.dris,3,Direction des ressources et de l'information scientifique,2021, vqpf.dris,3,Direction des ressources et de l'information scientifique,2021,
tib.gfzbib,3,GFZpublic,2011,https://gfzpublic.gfz-potsdam.de tib.gfzbib,3,GFZpublic,2011,https://gfzpublic.gfz-potsdam.de
tib.repod,2,RepOD,2015, tib.repod,2,RepOD,2015,
ugraz.unipub,2,unipub,2019,http://unipub.uni-graz.at ugraz.unipub,2,unipub,2019,http://unipub.uni-graz.at
bl.nerc,2,NERC Environmental Data Service,2011,https://eds.ukri.org bl.nerc,2,NERC Environmental Data Service,2011,https://eds.ukri.org
inist.epure,2,Éditions et presses universitaires de Reims,2020,
inist.opgc,1,Observatoire de Physique du Globe de Clermont-Ferrand,2017,
ardcx.nci,1,National Computational Infrastructure,2020,
umass.uma,1,University of Massachusetts (UMass) Amherst,2018,https://scholarworks.umass.edu/
bl.mendeley,1,Mendeley Data,2015,https://data.mendeley.com/
inist.eost,1,Ecole et Observatoire des Sciences de la Terre,2017,https://eost.unistra.fr/en/
crui.ingv,1,Istituto Nazionale di Geofisica e Vulcanologia (INGV),2013,http://data.ingv.it/
bl.iita,1,International Institute of Tropical Agriculture datasets,2017,http://data.iita.org/
ihumi.pub,1,IHU Méditerranée Infection,2020, ihumi.pub,1,IHU Méditerranée Infection,2020,
inist.omp,1,Observatoire Midi-Pyrénées,2011, ethz.zora,1,"Universität Zürich, ZORA",2013,https://www.zora.uzh.ch/
inist.ird,1,IRD,2016,
tib.gfz,1,GFZ Data Services,2011,https://dataservices.gfz-potsdam.de/portal/
edi.edi,1,Environmental Data Initiative,2017,https://portal.edirepository.org/nis/home.jsp edi.edi,1,Environmental Data Initiative,2017,https://portal.edirepository.org/nis/home.jsp
tib.gfz,1,GFZ Data Services,2011,https://dataservices.gfz-potsdam.de/portal/
inist.ird,1,IRD,2016,
inist.omp,1,Observatoire Midi-Pyrénées,2011,
tug.openlib,1,TU Graz OPEN Library,2020,https://openlib.tugraz.at/ tug.openlib,1,TU Graz OPEN Library,2020,https://openlib.tugraz.at/
ethz.zora,1,"Universität Zürich, ZORA",2013,https://www.zora.uzh.ch/ inist.opgc,1,Observatoire de Physique du Globe de Clermont-Ferrand,2017,
crui.ingv,1,Istituto Nazionale di Geofisica e Vulcanologia (INGV),2013,http://data.ingv.it/
inist.eost,1,Ecole et Observatoire des Sciences de la Terre,2017,https://eost.unistra.fr/en/
bl.mendeley,1,Mendeley Data,2015,https://data.mendeley.com/
bl.iita,1,International Institute of Tropical Agriculture datasets,2017,http://data.iita.org/
umass.uma,1,University of Massachusetts (UMass) Amherst,2018,https://scholarworks.umass.edu/
ardcx.nci,1,National Computational Infrastructure,2020,
estdoi.ttu,1,TalTech,2019,https://digikogu.taltech.ee estdoi.ttu,1,TalTech,2019,https://digikogu.taltech.ee
...@@ -20,7 +20,7 @@ print("\tDOIs to treat\t\t", len(dois)) ...@@ -20,7 +20,7 @@ print("\tDOIs to treat\t\t", len(dois))
## pour essayer avec un seul DOI ## pour essayer avec un seul DOI
# temp_doi = dois[random.randint(0, len(dois))] # temp_doi = dois[random.randint(0, len(dois))]
# #temp_doi = "10.57745/QYIAWX" - 10.25656/01:8509 # temp_doi = "10.57745/QYIAWX" - 10.25656/01:8509
# print(temp_doi) # print(temp_doi)
# raw_metadatas = my_functions.get_md_from_datacite(temp_doi) # raw_metadatas = my_functions.get_md_from_datacite(temp_doi)
...@@ -28,6 +28,7 @@ doi_error = [] # retrieve doi error ...@@ -28,6 +28,7 @@ doi_error = [] # retrieve doi error
temp_rows = [] # put data in dict before df temp_rows = [] # put data in dict before df
df_old = pd.read_csv("../dois-uga.csv") df_old = pd.read_csv("../dois-uga.csv")
print(f"\n\tnb of DOIs already treated\t{len(df_old)}") print(f"\n\tnb of DOIs already treated\t{len(df_old)}")
# req dataCite and paste data following instructions # req dataCite and paste data following instructions
...@@ -57,9 +58,11 @@ if temp_rows : ...@@ -57,9 +58,11 @@ if temp_rows :
df_fresh = pd.DataFrame(temp_rows) df_fresh = pd.DataFrame(temp_rows)
df_concat = pd.concat([df_old, df_fresh], ignore_index=True) df_concat = pd.concat([df_old, df_fresh], ignore_index=True)
## remove not wanted datacite type ## remove not wanted datacite type & clients
type_to_explude = ["Book", "ConferencePaper", "ConferenceProceeding", "JournalArticle", "BookChapter", "Service", "Preprint"] type_to_explude = ["Book", "ConferencePaper", "ConferenceProceeding", "JournalArticle", "BookChapter", "Service", "Preprint"]
df_out = df_concat[ ~df_concat["resourceTypeGeneral"].isin(type_to_explude) ].copy() clients_to_exclude = ["rg.rg", "inist.epure"]
df_out = df_concat[ ~df_concat["resourceTypeGeneral"].isin(type_to_explude) & ~df_concat["client"].isin(clients_to_exclude) ].copy()
## output main CSV ## output main CSV
df_out.to_csv("../dois-uga.csv", index = False) df_out.to_csv("../dois-uga.csv", index = False)
...@@ -69,7 +72,7 @@ if temp_rows : ...@@ -69,7 +72,7 @@ if temp_rows :
with open("nb-dois.txt", 'w') as outf : with open("nb-dois.txt", 'w') as outf :
outf.write(str(len(df_out))) outf.write(str(len(df_out)))
## output another csv with datacite client and number of datasets ## for the website : output another csv with datacite client and number of datasets
df_client_raw = df_out["client"].value_counts().to_frame() df_client_raw = df_out["client"].value_counts().to_frame()
## get informations about each client ## get informations about each client
......
2075 2069
\ No newline at end of file \ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment