From e7cfa0e490f4dced2c23d2a645360cbf33b57a2f Mon Sep 17 00:00:00 2001
From: Maxence Larrieu <m@larri.eu>
Date: Fri, 23 Feb 2024 14:11:13 +0100
Subject: [PATCH] improve doc readme

---
 0-collect-data/datacite.py                    |  4 +-
 0-collect-data/zenodo.py                      |  6 +--
 .../concatenate-enrich-dois.py                |  8 +++-
 .../z_personal_functions.py                   |  2 -
 README.md                                     | 45 ++++++++++++++-----
 run-all-codes.py                              |  4 +-
 6 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/0-collect-data/datacite.py b/0-collect-data/datacite.py
index 551ffe7..2f7f563 100644
--- a/0-collect-data/datacite.py
+++ b/0-collect-data/datacite.py
@@ -12,8 +12,8 @@ https://support.datacite.org/docs/can-i-see-more-detailed-affiliation-informatio
 
 * mÃ©mo : pour descendre au niveau des auteurs , le filtre `?person-id=orcid-nb`
 
-* nb 2024-01-13
-retrait de l'AAU (jbru.aau) car tout est du PD de congrÃ¨s
+* note 2023-02
+retrait de l'AAU (jbru.aau) car tout est du PDF de congrÃ¨s
 ajout de client.uid:inist.sshade et client.uid:inist.resif
 """
 
diff --git a/0-collect-data/zenodo.py b/0-collect-data/zenodo.py
index 4af8a8a..475fb58 100644
--- a/0-collect-data/zenodo.py
+++ b/0-collect-data/zenodo.py
@@ -5,7 +5,6 @@
 """
 ## todo
 - v2 : rechercher UGA comme financeur `grants.funder.doi`
-- v2 : passer par les ORCID des `creator.orcid` et `contributors.orcid`
 
 ## Documentation
 * Liste des mÃ©tadonnÃ©es obligatoires lors du dÃ©pÃ´ts (upload_type, sub_type, publication_date, titre, creators, ) https://developers.zenodo.org/#representation
@@ -13,12 +12,9 @@
 * Doc dev API champs de requÃªtes classsiques https://developers.zenodo.org/#records
 * doc champs poussÃ©es pour la recherche https://help.zenodo.org/guides/search/
 * typologie des dÃ©pÃ´ts possiblent : publication: Publication, poster: Poster, presentation: Presentation, Dataset: Dataset, image: Image, video: Video/Audio, software: Software, lesson: Lesson, physicalobject: Physical object, other: Other
+* descendre au niveau des ORCID des `creator.orcid` et `contributors.orcid`
 
 
-### Identifier les dÃ©pÃ´ts : 
-- utilisation du champs `creators.affiliation` et contributor.affiliation (multi affiliation)
-- utilisation de la forme "grenoble" uniquement, possibilitÃ© de bruit
-
 ## Notes sur la rÃ©cupÃ©ration
 - exemple rÃ©sultats de requete : https://zenodo.org/api/records?q=creators.affiliation%3A*grenoble*&type=dataset&page=6&size=100&sort=mostrecent&all_version=False
 - deux DOI identiques sont prÃ©sents : un Ã  la racine `[hits][doi]` et un autre dans `[hits][metadata][doi]`
diff --git a/1-enrich-with-datacite/concatenate-enrich-dois.py b/1-enrich-with-datacite/concatenate-enrich-dois.py
index 499f23d..2f4e4b5 100644
--- a/1-enrich-with-datacite/concatenate-enrich-dois.py
+++ b/1-enrich-with-datacite/concatenate-enrich-dois.py
@@ -55,7 +55,13 @@ for doi in dois : #[:300]
 ## if new datasets has been founded
 if temp_rows :
 	df_fresh = pd.DataFrame(temp_rows)
-	df_out = pd.concat([df_old, df_fresh], ignore_index=True)
+	df_concat = pd.concat([df_old, df_fresh], ignore_index=True)
+
+	## remove not wanted datacite type
+	type_to_explude = ["Book", "ConferencePaper", "ConferenceProceeding", "JournalArticle", "BookChapter", "Service", "Preprint"]
+    df_out = df_concat[ ~df_concat["resourceTypeGeneral"].isin(type_to_explude) ].copy()
+    
+    ## output main CSV
 	df_out.to_csv("../dois-uga.csv", index = False)
 	print(f"\n\nnb of doi exported \t{len(df_out)}")
 
diff --git a/1-enrich-with-datacite/z_personal_functions.py b/1-enrich-with-datacite/z_personal_functions.py
index fdd05d1..a0d8932 100644
--- a/1-enrich-with-datacite/z_personal_functions.py
+++ b/1-enrich-with-datacite/z_personal_functions.py
@@ -179,5 +179,3 @@ def from_files_load_dois(files_name) :
     
     
     return all_dois
-
-
diff --git a/README.md b/README.md
index 841c15c..275bc35 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,46 @@
 # Codes for the UGA Open research data monitor
 
-view contextualized results on the website : [mlarrieu.gricad-pages.univ-grenoble-alpes.fr/open-research-data-monitor](https://mlarrieu.gricad-pages.univ-grenoble-alpes.fr/open-research-data-monitor)
+View contextualized results on the website : [mlarrieu.gricad-pages.univ-grenoble-alpes.fr/open-research-data-monitor](https://mlarrieu.gricad-pages.univ-grenoble-alpes.fr/open-research-data-monitor)
 
+<br />
+<br />
 
-## Sources
+## Sources & identification methods
 
-(so far) 
+### Recherche Data Gouv
 
-|    		| UGA perimeter |
-|-----------|---------------|
-|RDG		|contact, auteurs, producteur et contributeurs avec "UGA" OR "Grenoble" |
-|DataCite	|creator et contributor avec ROR + clients & publisher		    		|
-|Zenodo		|creator et contributor avec "grenoble" 						|
-|Nakala		|UGA user identifiers											|
-|BSO via HAL|NA 															|
-|... 		|    |
+Recherche en format texte de `UGA` et `Grenoble` dans les champs auteurs, contributeurs, `datasetContactAffiliation` et `producerAffiliation`
 
+- memo 2024-02 : non possible de recherche avec AND
 
+### DataCite
 
+- directement depuis les clients Datacite qui relÃ¨vent de l'UGA  : `inist.osug`, `client.uid:inist.sshade`, `client.uid:inist.resif`, `client_id:inist.persyval`
+
+- sur les champs `creators` et `contributors` avec les ROR de l'universitÃ©
+
+- sur le champs `publisher` avec `grenoble AND alpes` 
+
+- instruire 
+	- l'UGA comme financeur
+	- via les ORCID des auteurs
+
+
+### Zenodo
+- sur les champs creators & contributeurs avec les affiliations et `grenoble`
+- @maxence regarder si on peut mettre un AND alpes
+- demain : possibilitÃ© de requÃªter par ROR ?
+
+### Nakala
+- directement sur les auteurs de l'UGA obtenus via HumaNum
+- instruire regarder cÃ´tÃ© `dcterms:publisher`
+
+### BSO
+extraction des DOI des jeux de donnÃ©es produits par l'universitÃ©
+
+
+<br />
+<br />
 
 ## Credits
 
diff --git a/run-all-codes.py b/run-all-codes.py
index c7e7817..5791897 100644
--- a/run-all-codes.py
+++ b/run-all-codes.py
@@ -30,11 +30,11 @@ collec_dois_prgm = [
 #to depug : run only one script	
 collec_dois_prgm = ["rdg.py", "datacite.py", "nakala.py", "zenodo.py"] 
 [run_py_file(file, "0-collect-data") for file in collec_dois_prgm]
-# exit()
 
 
 
-# # ______1______ Concatenate and enrich DOIs w DataCite
+
+## ______1______ Concatenate and enrich DOIs w DataCite
 run_py_file("concatenate-enrich-dois.py", "1-enrich-with-datacite")
 
 
-- 
GitLab