Compare revisions

0371df2a · 68be127d · a74ec07a · 6f7c07c3 · 8bec794b · 8c3677f3
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,7 @@
 /0-collect-data/bso/
 /1-enrich-with-datacite/__pycache__/
 /2-produce-graph/__pycache__/
-/hide
\ No newline at end of file
+/hide
+0-collect-data/.ipynb_checkpoints/z-datacite-demo-checkpoint.ipynb
+0-collect-data/.ipynb_checkpoints/z-resultats-demo-datacite-checkpoint.csv
+0-collect-data/z-resultats-demo-datacite.csv
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+image: python:3-alpine
+
+actualisation_dois:
+
+  only:
+    # restreindre au déclenchement automatique par schedule ou déclenchement manuel pour éviter de tourner en boucle (le commit produit par le pipeline qui déclenche le pipeline à nouveau)
+    - schedules
+    - web
+
+
+  before_script:
+    # installations permettant de faire tourner git
+    - apk update
+    - apk add git openssh
+
+  script:
+    # installation des bibliothèques python et exécution du script
+    - pip install pandas
+    - pip install requests
+    - pip install matplotlib
+    - python run-all-codes.py
+
+  after_script:
+    # commit des changements suite à l'exécution du script
+    - git config user.name "${GITLAB_USER_NAME}"
+    - git config user.email "${GITLAB_USER_EMAIL}"
+    - git remote set-url --push origin "https://PUSH_TOKEN:${ACCESS_TOKEN}@gricad-gitlab.univ-grenoble-alpes.fr/${CI_PROJECT_PATH}.git"
+    - git add -f dois-uga.csv dois-uga--last-500.csv 2-produce-graph/hist-evol-datasets-per-repo.png 2-produce-graph/hist-quantity-year-type.png 2-produce-graph/pie--datacite-client.png 2-produce-graph/pie--datacite-type.png 2-produce-graph/hist-last-datasets-by-client.png 1-enrich-with-datacite/all_datacite_clients_for_uga.csv 1-enrich-with-datacite/nb-dois.txt
+    - git commit -m "Execution du pipeline. Actualisation des dois et des graphes."
+    - git push origin HEAD:${CI_COMMIT_REF_NAME}
+
+    # création d'un espace accueillant le clone du repo du site web, et tests au cas où l'espace existe déjà
+    - cd ..
+    - if ! [ -d "cloned_repo" ]; then mkdir cloned_repo; fi
+    - if [ -d "cloned_repo/${PATH_TO_PUSH}" ]; then cd cloned_repo/${PATH_TO_PUSH}; git pull; else cd cloned_repo; git clone ${LINK_TO_CLONE}; fi
+    - cd -
+    # copier le fichier "nb-dois.txt" pour commit dans le repo du site web
+    - cp open-research-data-monitor-back/1-enrich-with-datacite/nb-dois.txt cloned_repo/${PATH_TO_PUSH}
+    - cd cloned_repo/${PATH_TO_PUSH}
+    # commit du fichier "nb-dois.txt" vers le repo du site web
+    - git config user.name "${GITLAB_USER_NAME}"
+    - git config user.email "${GITLAB_USER_EMAIL}"
+    - git remote set-url --push origin "https://PUSH_TOKEN2:${ACCESS_TOKEN2}@gricad-gitlab.univ-grenoble-alpes.fr/${PROJECT_PATH2}.git"
+    - git add -f nb-dois.txt
+    - git commit -m "Execution du pipeline. Actualisation du nombre de dois."
+    - git push origin HEAD:main
+  
+  artifacts:
+    # ajout des fichiers du dépôt qui ont été modifiés, au cas où un problème serait survenu dans "after_script"
+    paths:
+      - dois-uga.csv
+      - dois-uga--last-500.csv
+      - 2-produce-graph/hist-evol-datasets-per-repo.png
+      - 2-produce-graph/hist-quantity-year-type.png
+      - 2-produce-graph/pie--datacite-client.png
+      - 2-produce-graph/pie--datacite-type.png
+      - 2-produce-graph/hist-last-datasets-by-client.png
+      - 1-enrich-with-datacite/nb-dois.txt
+      - 1-enrich-with-datacite/all_datacite_clients_for_uga.csv
--- a/0-collect-data/datacite-dois.txt
+++ b/0-collect-data/datacite-dois.txt
--- a/0-collect-data/datacite.py
+++ b/0-collect-data/datacite.py
 # récupérer les DOIs de l'UGA depuis Datacite
-## 2023-12-01, Elias Chetouane
+## 2023-12-01, Elias Chetouane, Maxence Larrieu

 """
-## todo
- v2 : périmètre UGA : ajouter les ROR des unités, des établissements/laboratoires associés (par exemple Grenoble INP)

-## Documentation
-* Doc générale API Datacite : https://support.datacite.org/docs/api
+## Doc
+* datacite API : https://support.datacite.org/docs/api
 * Recherche d'un DOI : https://support.datacite.org/docs/api-sampling
 * Pagination : https://support.datacite.org/docs/pagination
+* nb : "DataCite added support for affiliation identifiers in Metadata Schema 4.3, released August 2019"
+https://support.datacite.org/docs/can-i-see-more-detailed-affiliation-information-in-the-rest-api
+
+* mémo : pour descendre au niveau des auteurs , le filtre `?person-id=orcid-nb`
+
+* note 2023-02
+retrait de l'AAU (jbru.aau) car tout est du PDF de congrès
+ajout de client.uid:inist.sshade et client.uid:inist.resif
 """

-import requests
+def get_results(query_from_list, view_results):
+    """
+    lance les requêtes dans l'API 
+    """

-urls = [
-    'https://api.datacite.org/dois?affiliation=true&query=creators.affiliation.affiliationIdentifier:"https://ror.org/02rx3b187"&page[size]=100',
-    'https://api.datacite.org/dois?affiliation=true&query=contributors.affiliation.affiliationIdentifier:"https://ror.org/02rx3b187"&page[size]=100'
-    # possibilité d'ajouter d'autres requêtes
-]
+    query_root = "https://api.datacite.org/dois?query="
+    
+    # les type de datasets à exclure (à voir si on retire text)
+    # note 2023-12-15 ça retire seulement 15 DOIs
+    query_filter_type = " AND resource_type_id:(-book -bookChapter -conferencePaper -conferenceProceeding -dissertation -event -journal -journalArticle -peerReview -preprint -report -service)"
+
+    query_page = "&page[size]=100"

-# on définit une fonction pour la lancer la requete avec chaque url pour les différentes affiliations
-def get_results(url):
-    req = requests.get(url)
+    req = requests.get(query_root + query_from_list +  query_filter_type + query_page)
    results = [req.json()]
    
+    if view_results: 
+        # print total dataset per query
+        print(f"\n\t{req.url}")
+        print(f"\t{results[0]['meta']['total']}")
+
    # obtenir les résultats de chaque page dans la liste results
    nb_pages = results[0]["meta"]["totalPages"]
    page = 1
@@ -34,42 +48,88 @@ def get_results(url):
        page += 1
    return results

-# on crée une fonction pour ajouter les DOIs dans une liste, sans ajouter les résultats qui ne sont pas des DOIs
+
 def get_dois(results):
-    dois = []
-    not_dois = []
+    """
+    Ajouter les datasets avec DOI dans une liste et identifier ceux sans DOI
+    """
+    temp_dois = []

    # prendre en compte les résultats de chaque page
    for res in results:
        num_dois = res["data"]

        for item in num_dois :
-            doi = item["id"]
-            if item.get("type") != "dois":
-                print("Le résultat " + str(item) + " est de type " + item.get("type") + " : " + doi)
-                not_dois.append(doi)
+            item_id = item["id"]
+
+            # si le dataset a un id qui correspond à un DOI
+            if item.get("type") == "dois":
+                temp_dois.append(item_id)
            else:
-                dois.append(doi)
-    return dois, not_dois
+                ## si l'id n'est pas un DOI on l'ajoute ds une liste globale
+                main_no_dois.append(item_id)
+    
+    return temp_dois
+

-# on récupère les dois
+print("\n\nRunning datacite.py")

-dois = []
-not_dois = []
+import requests, pandas as pd
+main_dois = []
+main_no_dois = []
+
+## __________0__________ query with all RORs from UGA on authors and contributors
+
+### load table containing finded ROR
+df_raw = pd.read_csv("uga-find-ror-childs/UGA-ror-childs--2023-12-29--manual.csv")
+#print("columns name: ", [colname for colname in df_raw.columns])
+
+### select accurate ROR
+df_hal = df_raw[ df_raw["docid"].notna() ] ## RORs finded via HAL
+df_ror = df_raw[ df_raw["Unnamed: 6"] == "include"] ## RORs selected manually /!\ col name
+rors = df_hal.ror.tolist() + df_ror.ror.tolist() 
+rors.append("https://ror.org/02rx3b187") ## add the ROR from UGA ! 
+print(f"\t__process by ROR\n\tnb of ROR loaded\t{len(rors)}")
+
+# debug : try only with UGA ROR
+# rors = ["https://ror.org/02rx3b187"]
+
+for ror in rors : # to debug add [:1] 
+    for auth_type in ["creators", "contributors"] :
+        query = f"{auth_type}.affiliation.affiliationIdentifier:\"{ror}\""
+        temp_doi_list = get_dois(get_results(query, False))
+        [main_dois.append(elem) for elem in temp_doi_list]
+
+print(f"\tnb DOIs finded \t{len(main_dois)}")
+
+
+## __________1__________ query by datacite client and UGA as pupublisher
+print(f"\n\t__process by datacite clients")
+query_client_publisher = [
+    "client_id:inist.osug",             # OSUG https://doi.osug.fr/
+    "client.uid:inist.sshade",          # services nationaux d'observation portés par l'OSUG
+    "client.uid:inist.resif",           # services nationaux d'observation portés par l'OSUG
+    "client_id:inist.persyval",         # Labex Persyval-lab (PIA)
+    "publisher:(grenoble AND alpes)"    # /!\ apporte du text
+]

-for url in urls:
-    doi, not_doi = get_dois(get_results(url))
-    dois += doi
-    not_dois += not_doi
+for query in query_client_publisher :
+    temp_doi_list = get_dois(get_results(query, True))
+    [main_dois.append(elem) for elem in temp_doi_list]

-# on supprime les doublons
+print(f"\tnb DOI finded \t{len(main_dois)}")

-unique_dois = list(set(dois))

-print("Nombre de dois différents trouvés : " + str(len(unique_dois)))
+## __________n__________ if Datasets with other things that a DOI have been finded
+if main_no_dois : 
+    print("datasets with an other identifier than DOI has been finded")
+    [print(f"\t\t{elem}") for elem in main_no_dois]

-# exporter la liste de DOI au format txt
+## __________n__________ remove duplicates
+unique_dois = list(set(main_dois))
+print(f"\tNb of unique DOI\t{len(unique_dois)}")

+# __________z__________ export DOIs in txt files
 with open("datacite-dois.txt", 'w') as f :
    [f.write(f"{line}\n") for line in unique_dois]

--- a/0-collect-data/nakala-dois.txt
+++ b/0-collect-data/nakala-dois.txt
-10.34847/nkl.5bcck3cz
-10.34847/nkl.ca709965
 10.34847/nkl.a0fe865m
 10.34847/nkl.76abr599
 10.34847/nkl.6caam3dp
+10.34847/nkl.5bcck3cz
+10.34847/nkl.ca709965
 10.34847/nkl.ca8dmbdh
 10.34847/nkl.a5ae8y33
 10.34847/nkl.748eqz51
 10.34847/nkl.2c0fj3ai
 10.34847/nkl.2bad8uj6
-10.34847/nkl.87788ro3
 10.34847/nkl.2404plkh
+10.34847/nkl.87788ro3
 10.34847/nkl.bafagy29
 10.34847/nkl.9bd4vqc6
 10.34847/nkl.4540o25d
@@ -24,3 +24,9 @@
 10.34847/nkl.81dcdekj
 10.34847/nkl.ef903o6v
 10.34847/nkl.ae94a74k
+10.34847/nkl.bf5f263z
+10.34847/nkl.9f85iol5
+10.34847/nkl.345bf9i7
+10.34847/nkl.9cd8hi4k
+10.34847/nkl.e1e41vdi
+10.34847/nkl.deb655as
--- a/0-collect-data/nakala-uga-users.txt
+++ b/0-collect-data/nakala-uga-users.txt
@@ -15,4 +15,20 @@ egreslou
 troulet
 mbeligne
 acarbonnelle
-annegf
\ No newline at end of file
+annegf
+tleduc
+abey
+mbarletta
+lmaritaud
+jbeaureder
+kboczon
+llacoste
+fcorsi
+ecarlier
+lvanbogaert
+nrousselot
+jlevy1
+mflecheux
+pbai
+ymonnier
+slecuyerchardevel
\ No newline at end of file
--- a/0-collect-data/nakala.py
+++ b/0-collect-data/nakala.py
@@ -68,17 +68,20 @@ def get_dois_n_users(user_id):

 import json, requests

+print("\n\nRunning nakala.py")
+
+
 ## list to stock datas
 nakala_uga_users = []
 all_dois = []
 other_user_finded = []

-## importer les users depuis le fichier txt
+## load nakala users from txt file
 with open('nakala-uga-users.txt', 'r') as f:
    ## attention bien stripper quand on import du txt sinon les sauts de ligne sont présents
    [nakala_uga_users.append( user.strip() ) for user in f.readlines()]

-print("nb d'utilisateur Nakala UGA importés", len(nakala_uga_users))
+print("\tnb nakala users loaded", len(nakala_uga_users))

 # ____n____ iterer sur les users uga
 for user in nakala_uga_users : 
@@ -87,7 +90,7 @@ for user in nakala_uga_users :
    res = get_dois_n_users(user)
    
    if res["continue"] :
-        print(f"{user}\n\tnb DOI {len(res['content'])}" )
+        #print(f"{user}\n\tnb DOI {len(res['content'])}" )
        
        ## ajouter les DOIs trouvés
        if len(res["content"]) > 0 : 
@@ -98,14 +101,17 @@ for user in nakala_uga_users :
            #print(f"\nnew person {','.join(res['other_users'])}")
            other_user_finded += [x for x in res["other_users"] ]

+print(f"\tnb dois finded\t\t{len(all_dois)}")
+

 ## ____n____ exporter les DOI au format txt

 with open("nakala-dois.txt", 'w') as fh :
    [fh.write(f"{line}\n") for line in all_dois]

-## print les autres utilisateurs trouvés7
-print("\n\n nakala new user finded ")
-for elem in other_user_finded : 
-    print("\t\telem")
+## print les autres utilisateurs trouvés
+if other_user_finded : 
+    print("\n\n\tnakala new user finded ")
+    for elem in other_user_finded : 
+        print(f"\t\t\t{elem}")

--- a/0-collect-data/rdg-dois.txt
+++ b/0-collect-data/rdg-dois.txt
-10.57745/QOA1QO
-10.57745/GZKUZS
-10.57745/J2A44Q
+10.57745/XHQ7TL
+10.15454/O93984
+10.57745/3D4DFW
+10.57745/69UNAM
+10.57745/TVAHUQ
+10.57745/3VMB3Y
+10.57745/TOR3SF
+10.57745/OVCWQN
+10.57745/GRHRZJ
+10.57745/Z3BG2U
+10.57745/QCVYG3
 10.15454/M7OK9E
-10.57745/QOA1QO
-10.57745/GZKUZS
-10.57745/NOHRHJ
-10.57745/JOZ1NA
+10.57745/MXEMI4
 10.57745/BYWEA3
-10.57745/J2A44Q
-10.57745/QOA1QO
+10.57745/7RFNNP
 10.57745/B6PSX0
-10.57745/BYWEA3
-10.57745/TVAHUQ
-10.57745/BYWEA3
-10.57745/QCVYG3
+10.57745/JOZ1NA
+10.57745/OT1IFB
 10.57745/NGC4J0
-10.57745/HZDPTT
-10.57745/69UNAM
-10.57745/ENJADK
+10.57745/ID1LS6
+10.57745/LPJ2S2
 10.57745/GZKUZS
+10.57745/LXTWNG
+10.57745/KTFZQD
 10.57745/ENJADK
-10.57745/LUTMNE
 10.57745/NZFWP9
-10.57745/LXTWNG
-10.57745/GZKUZS
-10.57745/W9N5Z9
+10.57745/NOHRHJ
 10.57745/RUQLJL
-10.15454/8UIA76
-10.57745/OVCWQN
-10.57745/MXEMI4
+10.57745/UOGRPY
 10.57745/5O6QIH
-10.57745/KTFZQD
-10.57745/R1NIKK
-10.57745/IZHDPC
-10.57745/TOR3SF
-10.57745/Z3BG2U
+10.57745/J2A44Q
 10.57745/7HF7KG
-10.57745/3D4DFW
-10.57745/OT1IFB
-10.57745/XHQ7TL
-10.57745/ID1LS6
+10.57745/W9N5Z9
+10.15454/8UIA76
+10.57745/HZDPTT
+10.57745/IZHDPC
 10.57745/52HT2L
-10.57745/YWBDQQ
-10.57745/LPJ2S2
 10.57745/CM2WOI
-10.57745/3VMB3Y
-10.15454/O93984
-10.57745/GRHRZJ
-10.57745/7RFNNP
-10.57745/UOGRPY
+10.57745/R1NIKK
+10.57745/QOA1QO
+10.57745/LUTMNE
+10.57745/YWBDQQ
--- a/0-collect-data/rdg.py
+++ b/0-collect-data/rdg.py
@@ -2,9 +2,6 @@
 ## 2023-12-01, Elias Chetouane

 """
-## todo
-/
-
 ## Documentation
 * Noms des champs à requêter : https://entrepot.recherche.data.gouv.fr/api/metadatablocks/citation
 * Doc API Dataverse : https://guides.dataverse.org/en/5.12.1/api/search.html
@@ -24,16 +21,17 @@ urls = [
    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=authorAffiliation%3AUGA',
    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=producerAffiliation%3AUGA',
    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=contributorAffiliation%3AUGA',
-    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=datasetContactAffiliation%3AGrenoble',
-    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=authorAffiliation%3AGrenoble',
-    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=producerAffiliation%3AGrenoble',
-    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=contributorAffiliation%3AGrenoble'
+    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=datasetContactAffiliation%3A(Grenoble AND Alpes)',
+    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=authorAffiliation%3A(Grenoble AND Alpes)',
+    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=producerAffiliation%3A(Grenoble AND Alpes)',
+    'https://entrepot.recherche.data.gouv.fr/api/search?q=*&fq=contributorAffiliation%3A(Grenoble AND Alpes)'
    # possiblilité d'ajouter d'autres requêtes
 ]

 # on définit une fonction pour la lancer la requete avec chaque url pour les différentes affiliations
 def get_results(url):
-    req = requests.get(url)
+    req = requests.get(url+"&type=dataset")
+    #print(req.url)
    results = [req.json()]
    
    # obtenir les résultats de chaque page dans la liste results
@@ -41,7 +39,7 @@ def get_results(url):
    count = nb_res
    page = 1
    while(nb_res > 0):
-        newurl = url+"&start="+str(count)
+        newurl = url+"&type=dataset"+"&start="+str(count)
        req = requests.get(newurl)
        results.append(req.json())
        nb_res = results[page]["data"]["count_in_response"]
@@ -61,13 +59,14 @@ def get_dois(results):
        nb_dois += len(num_dois)

        for item in num_dois :
-            dois.append(item["global_id"])
+            dois.append(item.get("global_id"))
        
-    print("Nombre de résultats trouvés : " + str(nb_dois))
+    print("\tnb DOIs\t\t" + str(nb_dois))
    return dois

-# on récupère les dois
+print("\n\nRunning rdg.py")

+# on récupère les dois
 dois = []

 for url in urls:
@@ -77,10 +76,11 @@ for url in urls:

 unique_dois = list(set(dois))

-print("Nombre de dois différents trouvés : " + str(len(unique_dois)))
+print("\tnb DOIs uniques\t\t" + str(len(unique_dois)))

 # exporter la liste de DOI au format txt

 with open("rdg-dois.txt", 'w') as f :
-    [f.write(f"{line[4:]}\n") for line in unique_dois] # [4:] pour retirer "doi:" au début de chaque ligne
+    # memo [4:] pour retirer "doi:" au début de chaque lignes
+    [f.write(f"{line[4:]}\n") for line in unique_dois]

--- a/0-collect-data/uga-find-ror-childs/UGA-ror-childs--2023-12-29--manual.csv
+++ b/0-collect-data/uga-find-ror-childs/UGA-ror-childs--2023-12-29--manual.csv
+ror,name,type,docid,hal_child_nb,ror_relation_nb,,
+https://ror.org/05588ks88,LInguistique et DIdactique des Langues Étrangères et Maternelles,laboratory,1043147,0.0,,,
+https://ror.org/01yxtfe92,École nationale supérieure d'architecture de Grenoble,institution,1043352,1.0,,,
+https://ror.org/05hz99a17,"Ambiances, Architectures, Urbanités",laboratory,1088635,0.0,,,
+https://ror.org/0509qp208,Centre d'études et de recherches appliquées à la gestion,laboratory,1043181,0.0,,,
+https://ror.org/05sbt2524,Institut polytechnique de Grenoble - Grenoble Institute of Technology,institution,1043329,15.0,,,
+https://ror.org/02z8yps18,"Laboratoire de Génie des Procédés pour la Bioraffinerie, les Matériaux Bio-sourcés et l'Impression Fonctionnelle",laboratory,1162332,0.0,,,
+https://ror.org/04eg25g76,Laboratoire de Conception et d'Intégration des Systèmes,laboratory,1043068,0.0,,,
+https://ror.org/02wrme198,Grenoble Images Parole Signal Automatique,laboratory,1043333,0.0,,,
+https://ror.org/043pfpy19,Laboratoire des Écoulements Géophysiques et Industriels [Grenoble],laboratory,1043048,0.0,,,
+https://ror.org/03985kf35,Translational Innovation in Medicine and Complexity / Recherche Translationnelle et Innovation en Médecine et Complexité - UMR 5525,laboratory,1043049,0.0,,,
+https://ror.org/03bcdsr62,"Laboratoire sols, solides, structures - risques [Grenoble]",laboratory,1043064,0.0,,,
+https://ror.org/04dbzz632,Institut Néel,laboratory,1043183,0.0,,,
+https://ror.org/05hyx5a17,Laboratoire de Génie Electrique de Grenoble,laboratory,1043220,0.0,,,
+https://ror.org/01c8rcg82,Laboratoire d'Informatique de Grenoble,laboratory,1043301,0.0,,,
+https://ror.org/05afmzm11,VERIMAG,laboratory,1043148,0.0,,,
+https://ror.org/04ett5b41,Laboratoire Jean Kuntzmann,laboratory,1043077,0.0,,,
+https://ror.org/03taa9n66,"Institut de Microélectronique, Electromagnétisme et Photonique - Laboratoire d'Hyperfréquences et Caractérisation",laboratory,1043224,0.0,,,
+https://ror.org/03f0apy98,Laboratoire de Physique Subatomique et de Cosmologie,laboratory,1043144,0.0,,,
+https://ror.org/00fwjkb59,Laboratoire d'Economie Appliquée de Grenoble,laboratory,1043256,0.0,,,
+https://ror.org/04axb9j69,Laboratoire d'Electrochimie et de Physico-chimie des Matériaux et des Interfaces,laboratory,1043074,0.0,,,
+https://ror.org/04as3rk94,[GIN] Grenoble Institut des Neurosciences,regrouplaboratory,408885,0.0,,,
+https://ror.org/05rwrfh97,Institut Fourier,laboratory,1043234,0.0,,,
+https://ror.org/036zswm25,Laboratoire des technologies de la microélectronique,laboratory,1043042,0.0,,,
+https://ror.org/023n9q531,Laboratoire Interdisciplinaire de Physique [Saint Martin d’Hères],laboratory,1043294,0.0,,,
+https://ror.org/026j45x50,"Pacte, Laboratoire de sciences sociales",laboratory,1043180,0.0,,,
+https://ror.org/02mc6qk71,Laboratoire de physique et modélisation des milieux condensés,laboratory,1043152,0.0,,,
+https://ror.org/04szabx38,Institut de biologie structurale,department,1043235,0.0,,,
+https://ror.org/02dpnb389,Clinatec - Centre de recherche biomédicale Edmond J.Safra,department,416269,0.0,,,
+https://ror.org/03x1z2w73,Laboratoire d'Ecologie Alpine,laboratory,1043062,0.0,,,
+https://ror.org/045ktmd28,Laboratoire national des champs magnétiques intenses - Grenoble,laboratory,1043255,0.0,,,
+https://ror.org/04qz4qy85,LAboratoire de Recherche Historique Rhône-Alpes - UMR5190,laboratory,1043116,0.0,,,
+https://ror.org/041rhpw39,Centre Hospitalier Universitaire de Grenoble,healthcare,,,4.0,include,
+https://ror.org/000tdrn36,Centre Interuniversitaire de MicroElectronique et Nanotechnologies,facility,,,2.0,include,
+https://ror.org/02wmc6m46,Institut d'Histoire des Représentations et des Idées dans les Modernités,facility,,,6.0,error,old UMR
+https://ror.org/01cf2sz15,Institut des Sciences de la Terre,facility,,,5.0,include,
+https://ror.org/01wwcfa26,Institute of Environmental Geosciences,facility,,,5.0,include,
+https://ror.org/03eqm6y13,LabEx PERSYVAL-Lab,facility,,,5.0,include,
+https://ror.org/0157h5t87,"Langages, Littératures, Sociétés. Études Transfrontalières et Internationales",facility,,,2.0,error,old UMR
+https://ror.org/02f7wz369,Pierre Mendès-France University,education,,,7.0,include,
+https://ror.org/03yppfm65,Stendhal University,education,,,5.0,include,
+https://ror.org/02rmwrd87,"Laboratoire Environnements, Dynamiques et Territoires de Montagne",facility,,,4.0,include,
+https://ror.org/047p7mf25,Institut Carnot PolyNat,facility,,,2.0,include,
+https://ror.org/03vte9x46,Observatoire des Sciences de l'Univers de Grenoble,education,,,5.0,include,
+https://ror.org/05c99vk74,Laboratoire de Recherche sur les Apprentissages en Contexte,facility,,,2.0,include,
+https://ror.org/04fhvpc68,Département de Pharmacochimie Moléculaire,facility,,,2.0,include,
+https://ror.org/03jrb0276,"Laboratoire Inter-universitaire de Psychologie: Personnalité, Cognition, Changement Social",facility,,,2.0,include,
+https://ror.org/01kbr1737,PHotonique ELectronique et Ingénierie QuantiqueS,facility,,,2.0,include,
+https://ror.org/00ndvqf03,Laboratoire Modélisation et Exploration des Matériaux,facility,,,2.0,include,
+https://ror.org/0467x8h16,Maison des Sciences de l'Homme-Alpes,facility,,,2.0,include,
+https://ror.org/02cmt9z73,Agence pour les Mathématiques en Interaction avec l'Entreprise et la Société,facility,,,2.0,exclude,GDR national
+https://ror.org/026m44z54,Laboratoire Nanotechnologies et Nanosystèmes,facility,,,6.0,include,
+https://ror.org/03949e763,Integrated Structural Biology Grenoble,facility,,,3.0,include,
+https://ror.org/04ndt7n58,"Centre d'Etudes et de Recherche sur la diplomatie, l’Administration Publique et le Politique",facility,,,2.0,include,
+https://ror.org/044cfnj78,GRIDCAD - Grenoble Alpes Recherche-Infrastructure de Calcul intensif et de Données,facility,,,4.0,include,
+https://ror.org/03e044190,Spintronique et Technologie des Composants,facility,,,4.0,include,
+https://ror.org/05hb8m595,PHOTOSYNTHESE,other,,,12.0,exclude,GDR national
+https://ror.org/01w1erp60,Couplage Multi-physiques et Multi-échelles en mécanique géo-environnemental,other,,,19.0,exclude,GDR national
+https://ror.org/05be9p317,Micropesanteur Fondamentale et Appliquée,other,,,14.0,exclude,GDR national
+https://ror.org/01x6z5t49,Fédération de Recherche Spectroscopies de Photoémission,other,,,42.0,exclude,FR national
+https://ror.org/01nrtdp55,GDR NBODY : Problème quantique à N corps en chimie et physique,facility,,,38.0,exclude,GDR national
+https://ror.org/00hgbrg14,"Fédération française Matériaux sous hautes vitesses de déformation. Application aux matériaux en conditions extrêmes, Procédés et structures",other,,,16.0,exclude,FR national
+https://ror.org/0459fdx51,Fédération de Recherche sur l'Energie Solaire,other,,,45.0,exclude,FR national
+https://ror.org/04yem5s35,INFRANALYTICS,other,,,16.0,exclude,IR national
+https://ror.org/01sgwka45,Microscopie Fonctionnelle du Vivant,facility,,,31.0,exclude,GDR national
--- a/0-collect-data/uga-find-ror-childs/UGA-ror-childs--2023-12-29.csv
+++ b/0-collect-data/uga-find-ror-childs/UGA-ror-childs--2023-12-29.csv
+ror,name,type,docid,hal_child_nb,ror_relation_nb
+https://ror.org/05588ks88,LInguistique et DIdactique des Langues Étrangères et Maternelles,laboratory,1043147,0.0,
+https://ror.org/01yxtfe92,École nationale supérieure d'architecture de Grenoble,institution,1043352,1.0,
+https://ror.org/05hz99a17,"Ambiances, Architectures, Urbanités",laboratory,1088635,0.0,
+https://ror.org/0509qp208,Centre d'études et de recherches appliquées à la gestion,laboratory,1043181,0.0,
+https://ror.org/05sbt2524,Institut polytechnique de Grenoble - Grenoble Institute of Technology,institution,1043329,15.0,
+https://ror.org/02z8yps18,"Laboratoire de Génie des Procédés pour la Bioraffinerie, les Matériaux Bio-sourcés et l'Impression Fonctionnelle",laboratory,1162332,0.0,
+https://ror.org/04eg25g76,Laboratoire de Conception et d'Intégration des Systèmes,laboratory,1043068,0.0,
+https://ror.org/02wrme198,Grenoble Images Parole Signal Automatique,laboratory,1043333,0.0,
+https://ror.org/043pfpy19,Laboratoire des Écoulements Géophysiques et Industriels [Grenoble],laboratory,1043048,0.0,
+https://ror.org/03985kf35,Translational Innovation in Medicine and Complexity / Recherche Translationnelle et Innovation en Médecine et Complexité - UMR 5525,laboratory,1043049,0.0,
+https://ror.org/03bcdsr62,"Laboratoire sols, solides, structures - risques [Grenoble]",laboratory,1043064,0.0,
+https://ror.org/04dbzz632,Institut Néel,laboratory,1043183,0.0,
+https://ror.org/05hyx5a17,Laboratoire de Génie Electrique de Grenoble,laboratory,1043220,0.0,
+https://ror.org/01c8rcg82,Laboratoire d'Informatique de Grenoble,laboratory,1043301,0.0,
+https://ror.org/05afmzm11,VERIMAG,laboratory,1043148,0.0,
+https://ror.org/04ett5b41,Laboratoire Jean Kuntzmann,laboratory,1043077,0.0,
+https://ror.org/03taa9n66,"Institut de Microélectronique, Electromagnétisme et Photonique - Laboratoire d'Hyperfréquences et Caractérisation",laboratory,1043224,0.0,
+https://ror.org/03f0apy98,Laboratoire de Physique Subatomique et de Cosmologie,laboratory,1043144,0.0,
+https://ror.org/00fwjkb59,Laboratoire d'Economie Appliquée de Grenoble,laboratory,1043256,0.0,
+https://ror.org/04axb9j69,Laboratoire d'Electrochimie et de Physico-chimie des Matériaux et des Interfaces,laboratory,1043074,0.0,
+https://ror.org/04as3rk94,[GIN] Grenoble Institut des Neurosciences,regrouplaboratory,408885,0.0,
+https://ror.org/036zswm25,Laboratoire des technologies de la microélectronique,laboratory,1043042,0.0,
+https://ror.org/05rwrfh97,Institut Fourier,laboratory,1043234,0.0,
+https://ror.org/026j45x50,"Pacte, Laboratoire de sciences sociales",laboratory,1043180,0.0,
+https://ror.org/023n9q531,Laboratoire Interdisciplinaire de Physique [Saint Martin d’Hères],laboratory,1043294,0.0,
+https://ror.org/04szabx38,Institut de biologie structurale,department,1043235,0.0,
+https://ror.org/02mc6qk71,Laboratoire de physique et modélisation des milieux condensés,laboratory,1043152,0.0,
+https://ror.org/02dpnb389,Clinatec - Centre de recherche biomédicale Edmond J.Safra,department,416269,0.0,
+https://ror.org/03x1z2w73,Laboratoire d'Ecologie Alpine,laboratory,1043062,0.0,
+https://ror.org/045ktmd28,Laboratoire national des champs magnétiques intenses - Grenoble,laboratory,1043255,0.0,
+https://ror.org/04qz4qy85,LAboratoire de Recherche Historique Rhône-Alpes - UMR5190,laboratory,1043116,0.0,
+https://ror.org/041rhpw39,Centre Hospitalier Universitaire de Grenoble,healthcare,,,4.0
+https://ror.org/000tdrn36,Centre Interuniversitaire de MicroElectronique et Nanotechnologies,facility,,,2.0
+https://ror.org/02wmc6m46,Institut d'Histoire des Représentations et des Idées dans les Modernités,facility,,,6.0
+https://ror.org/01cf2sz15,Institut des Sciences de la Terre,facility,,,5.0
+https://ror.org/01wwcfa26,Institute of Environmental Geosciences,facility,,,5.0
+https://ror.org/03eqm6y13,LabEx PERSYVAL-Lab,facility,,,5.0
+https://ror.org/0157h5t87,"Langages, Littératures, Sociétés. Études Transfrontalières et Internationales",facility,,,2.0
+https://ror.org/02f7wz369,Pierre Mendès-France University,education,,,7.0
+https://ror.org/03yppfm65,Stendhal University,education,,,5.0
+https://ror.org/02rmwrd87,"Laboratoire Environnements, Dynamiques et Territoires de Montagne",facility,,,4.0
+https://ror.org/047p7mf25,Institut Carnot PolyNat,facility,,,2.0
+https://ror.org/03vte9x46,Observatoire des Sciences de l'Univers de Grenoble,education,,,5.0
+https://ror.org/05c99vk74,Laboratoire de Recherche sur les Apprentissages en Contexte,facility,,,2.0
+https://ror.org/04fhvpc68,Département de Pharmacochimie Moléculaire,facility,,,2.0
+https://ror.org/03jrb0276,"Laboratoire Inter-universitaire de Psychologie: Personnalité, Cognition, Changement Social",facility,,,2.0
+https://ror.org/01kbr1737,PHotonique ELectronique et Ingénierie QuantiqueS,facility,,,2.0
+https://ror.org/00ndvqf03,Laboratoire Modélisation et Exploration des Matériaux,facility,,,2.0
+https://ror.org/0467x8h16,Maison des Sciences de l'Homme-Alpes,facility,,,2.0
+https://ror.org/02cmt9z73,Agence pour les Mathématiques en Interaction avec l'Entreprise et la Société,facility,,,2.0
+https://ror.org/026m44z54,Laboratoire Nanotechnologies et Nanosystèmes,facility,,,6.0
+https://ror.org/03949e763,Integrated Structural Biology Grenoble,facility,,,3.0
+https://ror.org/04ndt7n58,"Centre d'Etudes et de Recherche sur la diplomatie, l’Administration Publique et le Politique",facility,,,2.0
+https://ror.org/044cfnj78,GRIDCAD - Grenoble Alpes Recherche-Infrastructure de Calcul intensif et de Données,facility,,,4.0
+https://ror.org/03e044190,Spintronique et Technologie des Composants,facility,,,4.0
+https://ror.org/05hb8m595,PHOTOSYNTHESE,other,,,12.0
+https://ror.org/01w1erp60,Couplage Multi-physiques et Multi-échelles en mécanique géo-environnemental,other,,,19.0
+https://ror.org/05be9p317,Micropesanteur Fondamentale et Appliquée,other,,,14.0
+https://ror.org/01x6z5t49,Fédération de Recherche Spectroscopies de Photoémission,other,,,42.0
+https://ror.org/01nrtdp55,GDR NBODY : Problème quantique à N corps en chimie et physique,facility,,,38.0
+https://ror.org/00hgbrg14,"Fédération française Matériaux sous hautes vitesses de déformation. Application aux matériaux en conditions extrêmes, Procédés et structures",other,,,16.0
+https://ror.org/0459fdx51,Fédération de Recherche sur l'Energie Solaire,other,,,45.0
+https://ror.org/04yem5s35,INFRANALYTICS,other,,,16.0
+https://ror.org/01sgwka45,Microscopie Fonctionnelle du Vivant,facility,,,31.0
--- a/0-collect-data/uga-find-ror-childs/ror-retrieve-uga-rors.py
+++ b/0-collect-data/uga-find-ror-childs/ror-retrieve-uga-rors.py
+# Récupérer les RORs de l'env. UGA
+## 2024-01-08, Maxence Larrieu
+
+
+"""
+## step
+- recupere les ROR qui ont pour parent le ROR de l'UGA
+- HAL : on réduit aux structures VALID
+- ROR apporte du bruit et demande un nettoyage manuel a posteriori
+- ROR : on retire les UMR passées et les réseaux nationaux type GDR, FR, IR
+- cette dernière étape se fait à la main à partir des tableaux produits
+
+
+## documentation
+
+* HAL
+https://api.archives-ouvertes.fr/ref/structure/?q=parentRor_id:02rx3b187
+
+
+* ROR 
+https://ror.readme.io/docs/ror-schema-api-v2-beta
+https://api.dev.ror.org/v2/organizations/02rx3b187
+"""
+
+
+import requests, json, pandas as pd
+import datetime
+
+
+## ___0____ from HAL structure API get UGA ROR childs
+
+def hal_struct_return_docs(ror_id) : 
+	"""
+	HAL structure referentiel
+	https://api.archives-ouvertes.fr/docs/ref/?resource=structure&schema=fields#fields
+	list structures who have a parent corresponding to ror_id
+	output raw results inside the "docs" key
+	"""
+
+	# query human language : childs of ROR structure that are VALID and has a ROR id
+	req_filter = f"q=parentRor_id:{ror_id}&fq=ror_s:[%22%22%20TO%20*]\
+	&fq=valid_s:VALID&fl=ror_s,name_s,type_s,docid"
+
+	req = requests.get("https://api.archives-ouvertes.fr/ref/structure/?" + req_filter + "&rows=500")
+	# to debug print(req.url)
+	res = req.json()
+	return res["response"]["docs"]
+
+
+def hal_struct_return_ror_childs(ror_id) :
+	"""
+	boucle récursive
+	à partir d'un résultat de HAL structure sur parent_ror
+	itérer sur chacun des éléments et les ajouter
+	et refaire pareil sur ces éléments
+	"""	
+	hal_results = hal_struct_return_docs(ror_id)
+
+	for item in hal_results : 
+		# in HAL structure API, ror are in list
+		for ror_url in item["ror_s"] :
+
+			## find number of ROR child to the sub element
+			ror_id =  convert_ror_url_to_id(ror_url)
+			new_hal_results = hal_struct_return_docs(ror_id)
+			nb_childs = len(new_hal_results)
+			struct_info = [ror_url, item["name_s"], item["type_s"], item["docid"], nb_childs]
+			
+			ror_childs.append(struct_info)
+
+			## if sub element has ROR child lets iterate on these new results
+			if nb_childs > 0 :
+				new_ror_id = convert_ror_url_to_id(ror_url)
+				hal_struct_return_ror_childs(new_ror_id)
+
+
+def convert_ror_url_to_id (ror_url) : 
+	# convert https://ror.org/05rwrfh97 to 05rwrfh97
+	## car l'API de HAL renoie l'URL pour ces deux champs ror_s et ror_urls :/ 
+	return ror_url[ ror_url.rfind("/") +1 :]
+
+
+ror_childs = []
+hal_struct_return_ror_childs("02rx3b187")
+
+print(f"from HAL, nb of childs raw\t{len(ror_childs)}")
+
+df_hal = pd.DataFrame(ror_childs, columns = ["ror", "name", "type", "docid", "hal_child_nb"])
+df_hal.drop_duplicates(subset = ["ror"], inplace = True)
+print(f"from HAL, nb of unique childs\t{len(df_hal)}")
+
+
+## ____2____  from ROR API get UGA ROR childs
+
+def ror_get_child_info(ror_id) : 
+	req = requests.get("https://api.dev.ror.org/v2/organizations/" + ror_id)
+	res = req.json()
+	types_list = [ item for item in res["types"]]
+	org_type = ",".join(types_list)
+	return {
+		"org_type" : org_type,
+		"relation_nb" : len(res["relationships"])
+		}
+
+# query on ROR w UGA ROR_id
+req = requests.get("https://api.dev.ror.org/v2/organizations/02rx3b187")
+res = req.json()
+# relationships contains all ror "childs"
+ror_list = res["relationships"]
+print(f"from ROR, nb of childs finded\t{len(ror_list)}")
+
+## for each ROR finded enrich with ROR API
+data_from_rors = []
+for item in ror_list :
+	if item["id"] not in df_hal.ror.values : 
+		org_info = ror_get_child_info(item["id"])
+		data_from_rors.append( 
+			[ item["id"], item["label"], org_info["org_type"], org_info["relation_nb"] ]
+		)
+
+
+df_ror = pd.DataFrame(data_from_rors, columns = ["ror", "name", "type", "ror_relation_nb"])
+
+df = pd.concat([df_hal, df_ror], ignore_index = True)
+date = datetime.datetime.now().strftime("%Y-%m-%d") ## format date has 2023-12-29
+df.to_csv(f"UGA-ror-childs--{date}.csv", index = False)
+
+
+print("nb of RORs finded", len(df))
--- a/0-collect-data/z-datacite-demo.ipynb
+++ b/0-collect-data/z-datacite-demo.ipynb
 %% Cell type:markdown id:ce5cebe1-a058-4c0f-a5b5-23a02baa3521 tags:

 # Démo récupération des DOIs affiliés à l'UGA depuis Datacite

 ## Documentation
 * Doc générale API Datacite : https://support.datacite.org/docs/api
 * Recherche d'un DOI : https://support.datacite.org/docs/api-sampling
 * Pagination : https://support.datacite.org/docs/pagination

 ## Code

 %% Cell type:code id:d229201d-4e79-40a8-9472-9ea46b344b1c tags:

 ``` python
 import requests, json, pandas
 ```

 %% Cell type:code id:7ce19b89-d5b7-4dbe-9fab-a15a81b42078 tags:

 ``` python
 # construction de l'url
 url_pre = 'https://api.datacite.org/dois?affiliation=true&page[size]=1000'
 url_query = '&query=(creators.affiliation.affiliationIdentifier:"https://ror.org/02rx3b187") AND (types.resourceTypeGeneral:Dataset)'

 # obtention des résultats de la requête
 req = requests.get(url_pre + url_query)
 results = req.json()
 ```

 %% Cell type:code id:ec73be41-21df-4448-b58e-e21306f6b9fa tags:

 ``` python
 # ajouter les DOIs dans une liste, sans ajouter les résultats qui ne sont pas des DOIs
 # ajouter les autres résultats qui nous intéressent dans des listes afin de construire un DataFrame
 dois = []
 not_dois = []
 titles = []
 dates = []
 authors = []

 # boucler pour ajouter les informations relatives à chaque dépôt
 all_md_list = results["data"]
 nb_dois = len(all_md_list)

 for item in all_md_list :
    doi = item["id"]
    # si l'identifiant n'est pas un doi, on ne le prend pas
    if item.get("type") != "dois":
        print("Le résultat " + str(item) + " est de type " + item.get("type") + " : " + doi)
        not_dois.append(doi)
    # sinon, on récupère les informations dont on a besoin
    else:
        dois.append(doi)
        titles.append(item["attributes"]["titles"][0].get("title"))
        dates.append(item["attributes"]["created"])
        # boucler pour obtenir tous les auteurs
        auts = []
        for aut in item["attributes"]["creators"]:
            auts.append(aut.get("name"))
        authors.append(auts)

 # affichage du résultat
 print("Nombre de résultats trouvés : " + str(nb_dois))
 ```

 %% Output

    Nombre de résultats trouvés : 142

 %% Cell type:code id:e0722b5b-0059-4842-8e8a-2125239a7b7d tags:

 ``` python
 # construction du DataFrame
 df = pandas.DataFrame({'DOI':dois, 'Titre':titles, 'Date':dates, 'Auteurs':authors})

 df.to_csv("z-resultats-demo-datacite.csv")
 df
 ```

 %% Output

                                                  DOI  \
    0                                  10.7280/d11h3x
    1                                  10.7280/d1mm37
-    2                                  10.7280/d1667w
-    3                                  10.7280/d1595v
+    2                                  10.7280/d1595v
+    3                                  10.7280/d1667w
    4                                  10.7280/d1b114
    ..                                            ...
    137                  10.6084/m9.figshare.23488967
    138                               10.18150/wyyjk6
    139                               10.13127/efsm20
    140  10.5285/3ea504d8-41c2-40dc-86dc-284c341badaa
    141  10.5285/634ee206-258f-4b47-9237-efff4ef9eedd
    
                                                     Titre                  Date  \
    0    Annual Ice Velocity of the Greenland Ice Sheet...  2019-03-29T12:53:36Z
    1    Annual Ice Velocity of the Greenland Ice Sheet...  2018-12-14T09:39:45Z
-    2    Greenland Marine-Terminating Glacier Retreat Data  2020-12-01T18:09:19Z
-    3    Annual Ice Velocity of the Greenland Ice Sheet...  2019-03-29T10:37:23Z
+    2    Annual Ice Velocity of the Greenland Ice Sheet...  2019-03-29T10:37:23Z
+    3    Greenland Marine-Terminating Glacier Retreat Data  2020-12-01T18:09:19Z
    4    Dataset for: Fast retreat of Pope, Smith, and ...  2021-11-01T23:46:08Z
    ..                                                 ...                   ...
    137  Additional file 1 of 3DVizSNP: a tool for rapi...  2023-06-10T03:21:52Z
    138  Estimates for recombination coefficients from ...  2022-04-21T14:17:28Z
    139  European Fault-Source Model 2020 (EFSM20): onl...  2022-10-30T16:28:46Z
    140  Ice radar data from Little Dome C, Antarctica,...  2022-03-04T09:26:18Z
    141  Polarimetric ApRES data on a profile across Do...  2021-09-16T11:17:15Z
    
                                                   Auteurs
    0    [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...
    1    [Mouginot, Jeremie, Rignot, Eric, Millan, Roma...
-    2    [Wood, Michael, Rignot, Eric, Bjørk, Anders, V...
-    3    [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...
+    2    [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...
+    3    [Wood, Michael, Rignot, Eric, Bjørk, Anders, V...
    4    [Milillo, Pietro, Rignot, Eric, Rizzoli, Paola...
    ..                                                 ...
    137  [Sierk, Michael, Ratnayake, Shashikala, Wagle,...
    138  [Sakowski, Konrad, Borowik, Lukasz, Rochat, Né...
    139  [Basili, Roberto, Danciu, Laurentiu, Beauval, ...
    140  [Mulvaney, Robert, King, Edward, Martin, Carlo...
    141      [Corr, Hugh, Ritz, Catherine, Martin, Carlos]
    
    [142 rows x 4 columns]

 %% Cell type:code id:0308feea-2560-4e33-836a-285a65db2429 tags:

 ``` python
 ```

 %% Cell type:markdown id:ce5cebe1-a058-4c0f-a5b5-23a02baa3521 tags:

 # Démo récupération des DOIs affiliés à l'UGA depuis Datacite

 ## Documentation
 * Doc générale API Datacite : https://support.datacite.org/docs/api
 * Recherche d'un DOI : https://support.datacite.org/docs/api-sampling
 * Pagination : https://support.datacite.org/docs/pagination

 ## Code

 %% Cell type:code id:d229201d-4e79-40a8-9472-9ea46b344b1c tags:

 ``` python
 import requests, json, pandas
 ```

 %% Cell type:code id:7ce19b89-d5b7-4dbe-9fab-a15a81b42078 tags:

 ``` python
 # construction de l'url
 url_pre = 'https://api.datacite.org/dois?affiliation=true&page[size]=1000'
 url_query = '&query=(creators.affiliation.affiliationIdentifier:"https://ror.org/02rx3b187") AND (types.resourceTypeGeneral:Dataset)'

 # obtention des résultats de la requête
 req = requests.get(url_pre + url_query)
 results = req.json()
 ```

 %% Cell type:code id:ec73be41-21df-4448-b58e-e21306f6b9fa tags:

 ``` python
 # ajouter les DOIs dans une liste, sans ajouter les résultats qui ne sont pas des DOIs
 # ajouter les autres résultats qui nous intéressent dans des listes afin de construire un DataFrame
 dois = []
 not_dois = []
 titles = []
 dates = []
 authors = []

 # boucler pour ajouter les informations relatives à chaque dépôt
 all_md_list = results["data"]
 nb_dois = len(all_md_list)

 for item in all_md_list :
    doi = item["id"]
    # si l'identifiant n'est pas un doi, on ne le prend pas
    if item.get("type") != "dois":
        print("Le résultat " + str(item) + " est de type " + item.get("type") + " : " + doi)
        not_dois.append(doi)
    # sinon, on récupère les informations dont on a besoin
    else:
        dois.append(doi)
        titles.append(item["attributes"]["titles"][0].get("title"))
        dates.append(item["attributes"]["created"])
        # boucler pour obtenir tous les auteurs
        auts = []
        for aut in item["attributes"]["creators"]:
            auts.append(aut.get("name"))
        authors.append(auts)

 # affichage du résultat
 print("Nombre de résultats trouvés : " + str(nb_dois))
 ```

 %% Output

    Nombre de résultats trouvés : 142

 %% Cell type:code id:e0722b5b-0059-4842-8e8a-2125239a7b7d tags:

 ``` python
 # construction du DataFrame
 df = pandas.DataFrame({'DOI':dois, 'Titre':titles, 'Date':dates, 'Auteurs':authors})

 df.to_csv("z-resultats-demo-datacite.csv")
 df
 ```

 %% Output

                                                  DOI  \
    0                                  10.7280/d11h3x
    1                                  10.7280/d1mm37
-    2                                  10.7280/d1667w
-    3                                  10.7280/d1595v
+    2                                  10.7280/d1595v
+    3                                  10.7280/d1667w
    4                                  10.7280/d1b114
    ..                                            ...
    137                  10.6084/m9.figshare.23488967
    138                               10.18150/wyyjk6
    139                               10.13127/efsm20
    140  10.5285/3ea504d8-41c2-40dc-86dc-284c341badaa
    141  10.5285/634ee206-258f-4b47-9237-efff4ef9eedd
    
                                                     Titre                  Date  \
    0    Annual Ice Velocity of the Greenland Ice Sheet...  2019-03-29T12:53:36Z
    1    Annual Ice Velocity of the Greenland Ice Sheet...  2018-12-14T09:39:45Z
-    2    Greenland Marine-Terminating Glacier Retreat Data  2020-12-01T18:09:19Z
-    3    Annual Ice Velocity of the Greenland Ice Sheet...  2019-03-29T10:37:23Z
+    2    Annual Ice Velocity of the Greenland Ice Sheet...  2019-03-29T10:37:23Z
+    3    Greenland Marine-Terminating Glacier Retreat Data  2020-12-01T18:09:19Z
    4    Dataset for: Fast retreat of Pope, Smith, and ...  2021-11-01T23:46:08Z
    ..                                                 ...                   ...
    137  Additional file 1 of 3DVizSNP: a tool for rapi...  2023-06-10T03:21:52Z
    138  Estimates for recombination coefficients from ...  2022-04-21T14:17:28Z
    139  European Fault-Source Model 2020 (EFSM20): onl...  2022-10-30T16:28:46Z
    140  Ice radar data from Little Dome C, Antarctica,...  2022-03-04T09:26:18Z
    141  Polarimetric ApRES data on a profile across Do...  2021-09-16T11:17:15Z
    
                                                   Auteurs
    0    [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...
    1    [Mouginot, Jeremie, Rignot, Eric, Millan, Roma...
-    2    [Wood, Michael, Rignot, Eric, Bjørk, Anders, V...
-    3    [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...
+    2    [Mouginot, Jeremie, Rignot, Eric, Scheuchl, Be...
+    3    [Wood, Michael, Rignot, Eric, Bjørk, Anders, V...
    4    [Milillo, Pietro, Rignot, Eric, Rizzoli, Paola...
    ..                                                 ...
    137  [Sierk, Michael, Ratnayake, Shashikala, Wagle,...
    138  [Sakowski, Konrad, Borowik, Lukasz, Rochat, Né...
    139  [Basili, Roberto, Danciu, Laurentiu, Beauval, ...
    140  [Mulvaney, Robert, King, Edward, Martin, Carlo...
    141      [Corr, Hugh, Ritz, Catherine, Martin, Carlos]
    
    [142 rows x 4 columns]

 %% Cell type:code id:0308feea-2560-4e33-836a-285a65db2429 tags:

 ``` python
 ```

--- a/0-collect-data/zenodo-dois.txt
+++ b/0-collect-data/zenodo-dois.txt
--- a/0-collect-data/zenodo.py
+++ b/0-collect-data/zenodo.py
@@ -5,7 +5,6 @@
 """
 ## todo
 - v2 : rechercher UGA comme financeur `grants.funder.doi`
- v2 : passer par les ORCID des `creator.orcid` et `contributors.orcid`

 ## Documentation
 * Liste des métadonnées obligatoires lors du dépôts (upload_type, sub_type, publication_date, titre, creators, ) https://developers.zenodo.org/#representation
@@ -13,12 +12,9 @@
 * Doc dev API champs de requêtes classsiques https://developers.zenodo.org/#records
 * doc champs poussées pour la recherche https://help.zenodo.org/guides/search/
 * typologie des dépôts possiblent : publication: Publication, poster: Poster, presentation: Presentation, Dataset: Dataset, image: Image, video: Video/Audio, software: Software, lesson: Lesson, physicalobject: Physical object, other: Other
+* descendre au niveau des ORCID des `creator.orcid` et `contributors.orcid`


-### Identifier les dépôts : 
- utilisation du champs `creators.affiliation` et contributor.affiliation (multi affiliation)
- utilisation de la forme "grenoble" uniquement, possibilité de bruit
-
 ## Notes sur la récupération
 - exemple résultats de requete : https://zenodo.org/api/records?q=creators.affiliation%3A*grenoble*&type=dataset&page=6&size=100&sort=mostrecent&all_version=False
 - deux DOI identiques sont présents : un à la racine `[hits][doi]` et un autre dans `[hits][metadata][doi]`
@@ -27,13 +23,7 @@

 import requests, json

-print("\n\nzenodo.py is launched")
-
-
-with open("personnal-keys.json") as f : 
-    ## load zenodo keys for requests the API
-    ACCESS_TOKEN = json.load(f)["ZENODO_KEY"]
-
+print("\n\nRunning zenodo.py")

 def req_zenodo_with_page(uga_perimeter, record_type, page_nb) :
    """
@@ -50,7 +40,6 @@ def req_zenodo_with_page(uga_perimeter, record_type, page_nb) :
            "size" : 100,
            "sort" : "mostrecent",
            "all_version" : False,
-            "access_tpoken" : ACCESS_TOKEN
            } 
    )
    # for debugging 
@@ -89,7 +78,7 @@ def req_zenodo(uga_perimeter, record_type) :

        # verifier si la requete a bien fonctionnée
        if not res["continue"] : 
-            print(r"/!\ oups, problem with the query")
+            print(r"\t/!\ oups, problem with the query")
            print(res["content"].status_code ) ## contient la réponse de la requête
            break

@@ -117,20 +106,26 @@ def req_zenodo(uga_perimeter, record_type) :

 all_dois = set() # a set to gather all DOIs

-uga_perimeter = "creators.affiliation:*grenoble* contributors.affiliation:*grenoble*"
+uga_txt_query = "(\"grenoble alpes\" OR \"grenoble alps\" OR \"grenoble INP\" \
+OR \"polytechnique de grenoble\" OR \"Grenoble Institute of Technology\" OR \"univeristé de grenoble\" )"
+
+uga_query = f"creators.affiliation:{uga_txt_query} contributors.affiliation:{uga_txt_query}"
+
+## memo 2024-02 two fields following by space will search in first field or in second field
+## ne pas faire de recherche avec AND car ça recherche dans toutes les affiliations des auteurs
+## SceincesPo Grenoble n'apporte pas de résultat https://zenodo.org/search?q=creators.affiliation%3A%28sciencespo%20AND%20Grenoble%29&q=type%3Adataset&l=list&p=1&s=10&sort=bestmatch
+
 types_to_req = ["dataset", "image", "video", "software", "other"]

 for record_type in types_to_req : 

-    temp_dois = req_zenodo(uga_perimeter, record_type)
+    temp_dois = req_zenodo(uga_query, record_type)

    ## placer les DOI dans le buffer général qui ne peut contenir que des valeurs uniques
    [all_dois.add(doi) for doi in temp_dois]
    

-print("Zenodo nb of DOIs catched",
-    len(all_dois)
-    )
+print("\n\tnb DOIs finded\t",  len(all_dois) )

 ## ____z____ exporter la liste de DOI au format txt


--- a/1-enrich-with-datacite/all_datacite_clients_for_uga.csv
+++ b/1-enrich-with-datacite/all_datacite_clients_for_uga.csv
+client,count,name,year,url
+cern.zenodo,905,Zenodo,2013,https://zenodo.org/
+inist.sshade,530,Solid Spectroscopy Hosting Architecture of Databases and Expertise,2019,https://www.sshade.eu/
+figshare.ars,380,figshare Academic Research System,2016,http://figshare.com/
+inist.osug,275,Observatoire des Sciences de l'Univers de Grenoble,2014,http://doi.osug.fr
+dryad.dryad,168,DRYAD,2018,https://datadryad.org
+inist.resif,101,Réseau sismologique et géodésique français,2014,https://www.resif.fr/
+rdg.prod,85,Recherche Data Gouv France,2022,https://recherche.data.gouv.fr/en
+inist.humanum,75,NAKALA,2020,https://nakala.fr
+inist.persyval,64,PERSYVAL-Lab : Pervasive Systems and Algorithms Lab,2016,
+fmsh.prod,28,Fondation Maison des sciences de l'homme,2023,
+inist.ccj,22,Centre Camille Jullian – UMR 7299,2020,
+pangaea.repository,18,PANGAEA,2020,https://www.pangaea.de/
+mcdy.dohrmi,14,dggv-e-publications,2020,https://www.dggv.de/publikationen/dggv-e-publikationen.html
+inist.cirm,7,Centre International de Rencontres Mathématiques,2017,
+figshare.sage,6,figshare SAGE Publications,2018,
+iris.iris,5,NSF Seismological Facility for the Advancement of Geoscience (SAGE),2018,http://www.iris.edu/hq/
+tib.repod,4,RepOD,2015,https://repod.icm.edu.pl/
+vqpf.dris,3,Direction des ressources et de l'information scientifique,2021,
+tib.gfzbib,3,GFZpublic,2011,https://gfzpublic.gfz-potsdam.de
+cnic.sciencedb,3,ScienceDB,2022,https://www.scidb.cn/en
+inist.eost,2,Ecole et Observatoire des Sciences de la Terre,2017,https://eost.unistra.fr/en/
+tib.gfz,2,GFZ Data Services,2011,https://dataservices.gfz-potsdam.de/portal/
+bl.mendeley,2,Mendeley Data,2015,https://data.mendeley.com/
+bl.nerc,2,NERC Environmental Data Service,2011,https://eds.ukri.org
+tug.openlib,2,TU Graz OPEN Library,2020,https://openlib.tugraz.at/
+crui.ingv,2,Istituto Nazionale di Geofisica e Vulcanologia (INGV),2013,http://data.ingv.it/
+ugraz.unipub,2,unipub,2019,http://unipub.uni-graz.at
+ethz.sed,2,"Swiss Seismological Service, national earthquake monitoring and hazard center",2013,http://www.seismo.ethz.ch
+inist.opgc,1,Observatoire de Physique du Globe de Clermont-Ferrand,2017,
+ethz.da-rd,1,ETHZ Data Archive - Research Data,2013,http://data-archive.ethz.ch
+ethz.zora,1,"Universität Zürich, ZORA",2013,https://www.zora.uzh.ch/
+estdoi.ttu,1,TalTech,2019,https://digikogu.taltech.ee
+repod.dbuw,1,University of Warsaw Research Data Repository,2023,https://danebadawcze.uw.edu.pl/
+inist.ird,1,IRD,2016,
+inist.omp,1,Observatoire Midi-Pyrénées,2011,
+umass.uma,1,University of Massachusetts (UMass) Amherst,2018,https://scholarworks.umass.edu/
+edi.edi,1,Environmental Data Initiative,2017,https://portal.edirepository.org/nis/home.jsp
+bl.iita,1,International Institute of Tropical Agriculture datasets,2017,http://data.iita.org/
+ardcx.nci,1,National Computational Infrastructure,2020,
+ihumi.pub,1,IHU Méditerranée Infection,2020,
+inist.inrap,1,Institut national de recherches archéologiques préventives,2019,
+tib.mpdl,1,Max Planck Digital Library,2015,
+tudublin.arrow,1,ARROW@TU Dublin,2020,https://arrow.dit.ie/
--- a/1-enrich-with-datacite/concatenate-enrich-dois.py
+++ b/1-enrich-with-datacite/concatenate-enrich-dois.py
+import z_personal_functions as my_functions
+import requests, json, random, pandas as pd
+
+
+print("\n\nRunning concatenate-enrich-dois.py")
+
+# ______0______ load DOIs and remove duplicate
+
+## specifier la liste des entrepôts à importer
+files_to_load = [ "zenodo", "datacite", "rdg", "bso-via-hal", "nakala" ] 
+
+dois_raw = my_functions.from_files_load_dois(files_to_load)
+print("\n\tDOIs loaded\t\t\t", len(dois_raw))
+
+dois = list(set(dois_raw)) ## remove duplicate
+print("\tDOIs to treat\t\t", len(dois))
+
+
+# ______1_____ load DOIs already treater & get md from DataCite for new one
+
+## pour essayer avec un seul DOI
+# temp_doi = dois[random.randint(0, len(dois))]
+# temp_doi = "10.57745/QYIAWX" - 10.25656/01:8509
+# print(temp_doi)
+# raw_metadatas = my_functions.get_md_from_datacite(temp_doi)
+
+doi_error = [] # retrieve doi error
+temp_rows = [] # put data in dict before df
+
+df_old = pd.read_csv("../dois-uga.csv")
+
+print(f"\n\tnb of DOIs already treated\t{len(df_old)}")
+
+# req dataCite and paste data following instructions
+for doi in dois : #[:300]
+
+	## if doi already treated
+	if doi in df_old["doi"].values : 
+		#print(f"\talready treated\t\t{doi}")
+		continue
+
+	## ___n___ get md from datacite
+	raw_md = my_functions.get_md_from_datacite(doi)
+	## to debug print(f"\t{doi}")
+
+	### if doi not in datacite
+	if raw_md == "error" : 
+		doi_error.append(doi)
+		continue
+
+	## ___n___ from manual instructions retrieve appropriate data
+	selected_md = my_functions.parse_datacite_md(raw_md) ## placer les resultats dans un dictionnaire
+	temp_rows.append(selected_md) ## ajouter ce dictionnaire à une liste
+	print(f"\tadded\t\t{doi}")
+
+## if new datasets has been founded
+if temp_rows :
+	df_fresh = pd.DataFrame(temp_rows)
+	dois_added = list(df_old["doi"])
+	to_del = []
+	for i in range(0, len(df_fresh)):
+		result = my_functions.get_origin_version(df_fresh.loc[i, "doi"])
+		if result[0] not in dois_added: 
+			dois_added.append(result[0])
+			df_fresh.loc[i, "doi"] = result[0]
+			if str(result[1]) != "[]": df_fresh.loc[i, "traveled_dois"] = str(result[1])
+			else: df_fresh.loc[i, "traveled_dois"] = ""	
+			if str(result[2]) != "[]": df_fresh.loc[i, "all_relations"] = str(result[2])
+			else: df_fresh.loc[i, "all_relations"] = ""
+		else:
+			to_del.append(i)
+			
+	df_fresh.drop(to_del, inplace=True)
+	print("Nombre de dois supprimés : " + str(len(to_del)))
+	
+	print("Nb dois a garder : " + str(len(dois_added)))
+
+	df_concat = pd.concat([df_old, df_fresh], ignore_index=True)
+
+	## remove not wanted datacite type & clients
+	type_to_explude = ["Book", "ConferencePaper", "ConferenceProceeding", "JournalArticle", "BookChapter", "Service", "Preprint"]
+	clients_to_exclude = ["rg.rg", "inist.epure"]
+
+	df_out = df_concat[ ~df_concat["resourceTypeGeneral"].isin(type_to_explude) & ~df_concat["client"].isin(clients_to_exclude) ].copy()
+	
+	## output main CSV
+	df_out.to_csv("../dois-uga.csv", index = False)
+	print(f"\n\nnb of doi exported \t{len(df_out)}")
+
+
+	# write the number of dois found in a file to display on the website
+	with open("nb-dois.txt", 'w') as outf :
+		outf.write(str(len(df_out)))
+
+
+	## output last 500 DOIs to make it easier to open in web tools
+	df_last_dois = df_out.sort_values(by = "created", ascending = False, inplace = False)[:500]
+	df_last_dois["created"] = df_last_dois["created"].str[:10]
+	df_last_dois[["doi", "client", "resourceTypeGeneral", "created", "publisher", "rights", "sizes"]].to_csv("../dois-uga--last-500.csv", index = False)
+
+
+	## for the website : output another csv with datacite client and number of datasets
+	df_client_raw = df_out["client"].value_counts().to_frame()
+
+	## get informations about each client
+	client_names = []
+	client_years = []
+	client_urls = []
+	for i in range(0, len(df_client_raw)):
+		client = df_client_raw.iloc[i].name
+		req = requests.get('https://api.datacite.org/clients?query=id:%22'+str(client)+'%22')
+		client_names.append(req.json()["data"][0]["attributes"]["name"])
+		client_years.append(req.json()["data"][0]["attributes"]["year"])
+		client_urls.append(req.json()["data"][0]["attributes"]["url"])
+	
+	## add informations to the output csv
+	df_client_raw["name"] = client_names
+	df_client_raw["year"] = client_years
+	df_client_raw["url"] = client_urls
+	df_client_raw.to_csv("all_datacite_clients_for_uga.csv")
+
+
--- a/1-enrich-with-datacite/concatenet-enrich-dois.py
+++ b/1-enrich-with-datacite/concatenet-enrich-dois.py
-import z_personal_functions as my_functions
-import requests, json, random, pandas as pd
-
-
-# ______0______ load DOIs and remove duplicate
-
-## specifier la liste des entrepôts à importer
-repo_list = ["nakala", "bso-via-hal", "datacite", "zenodo", "rdg"] 
-
-dois_raw = my_functions.from_repos_load_dois(repo_list)
-print("DOIs loaded\t\t\t", len(dois_raw))
-
-## remove duplicate
-dois = list(set(dois_raw))
-print("DOIs to treat\t\t\t", len(dois))
-
-
-# ______1_____ load metadata from dataCite and get specified metadatas
-
-## pour essayer avec un seul DOI
-# # random doi 10.25656/01:8509
-# temp_doi = dois[random.randint(0, len(dois))]
-# #temp_doi = "10.57745/QYIAWX"
-# print(temp_doi)
-# raw_metadatas = my_functions.get_md_from_datacite(temp_doi)
-
-doi_error = [] # retrieve doi error
-temp_rows = [] # put data in dict before df
-
-df_old = pd.read_csv("../dois-uga.csv")
-print(f"\nnb of dois already treated\t{len(df_old)}")
-
-# req dataCite and paste data following instructions
-for doi in dois : #[:300]
-
-	## if doi already treated
-	if doi in df_old["doi"].values : 
-		#print(f"\talready treated\t\t{doi}")
-		continue
-
-	## get md from datacite
-	raw_md = my_functions.get_md_from_datacite(doi)
-
-	### if doi not in datacite
-	if raw_md == "error" : 
-		doi_error.append(doi)
-		continue
-
-	## from manual instruction retrieve accurate data
-	selected_md = my_functions.parse_datacite_md(raw_md) ## placer les resultats dans un dictionnaire
-	temp_rows.append(selected_md) ## ajouter ce dictionnaire à une liste
-	print(f"\tadded\t\t{doi}")
-
-
-if temp_rows :
-	df_fresh = pd.DataFrame(temp_rows)
-	df_out = pd.concat([df_old, df_fresh], ignore_index=True)
-	df_out.to_csv("../dois-uga.csv", index = False)
-	print(f"\n\nnb of doi exported \t{len(df_out)}")
-
--- a/1-enrich-with-datacite/datacite-parser-instructions.json
+++ b/1-enrich-with-datacite/datacite-parser-instructions.json
@@ -3,7 +3,7 @@
 	"version" : "2023-11-20",
 	"comentaires" : {
 		"0" : "non prise en compte de l'attribut dates",
-		"1" : "non prise en compte de l'attrbut relatedIdentifiers"
+		"1" : "non prise en compte de l'attribut relatedIdentifiers"
 	},
 	"path-and-fields" : {


--- a/1-enrich-with-datacite/nb-dois.txt
+++ b/1-enrich-with-datacite/nb-dois.txt
+2727
\ No newline at end of file
No results found