diff --git a/1-enrich-with-datacite/all_datacite_clients_for_uga.csv b/1-enrich-with-datacite/all_datacite_clients_for_uga.csv
index 0d97cef126258c747bdd2ba421e05800a09e790c..cabcfb09529b50481a0034176146023cce1d130b 100644
--- a/1-enrich-with-datacite/all_datacite_clients_for_uga.csv
+++ b/1-enrich-with-datacite/all_datacite_clients_for_uga.csv
@@ -1,10 +1,10 @@
 client,count,name,year,url
-cern.zenodo,743,Zenodo,2013,https://zenodo.org/
-inist.sshade,471,Solid Spectroscopy Hosting Architecture of Databases and Expertise,2019,https://www.sshade.eu/
+cern.zenodo,753,Zenodo,2013,https://zenodo.org/
+inist.sshade,472,Solid Spectroscopy Hosting Architecture of Databases and Expertise,2019,https://www.sshade.eu/
 figshare.ars,255,figshare Academic Research System,2016,http://figshare.com/
 inist.osug,238,Observatoire des Sciences de l'Univers de Grenoble,2014,http://doi.osug.fr
-dryad.dryad,156,DRYAD,2018,https://datadryad.org
-inist.resif,79,Réseau sismologique et géodésique français,2014,https://www.resif.fr/
+dryad.dryad,157,DRYAD,2018,https://datadryad.org
+inist.resif,80,Réseau sismologique et géodésique français,2014,https://www.resif.fr/
 inist.persyval,55,PERSYVAL-Lab : Pervasive Systems and Algorithms Lab,2016,
 rdg.prod,43,Recherche Data Gouv France,2022,https://recherche.data.gouv.fr/en
 fmsh.prod,28,Fondation Maison des sciences de l'homme,2023,
diff --git a/1-enrich-with-datacite/nb-dois.txt b/1-enrich-with-datacite/nb-dois.txt
index d37329ca5c23851d01876d9954d013752d8a9613..2e8bf645b7126114fef6d1ba8e442a3196cf6852 100644
--- a/1-enrich-with-datacite/nb-dois.txt
+++ b/1-enrich-with-datacite/nb-dois.txt
@@ -1 +1 @@
-2153
\ No newline at end of file
+2166
\ No newline at end of file
diff --git a/2-produce-graph/hist-evol-datasets-per-repo.png b/2-produce-graph/hist-evol-datasets-per-repo.png
index a86869948b3c08e9a6be2a3a615475d91a54054d..e5118a5af6ecd5a9c29016a4500e1b9f6161dde2 100644
Binary files a/2-produce-graph/hist-evol-datasets-per-repo.png and b/2-produce-graph/hist-evol-datasets-per-repo.png differ
diff --git a/2-produce-graph/hist-last-datasets-by-client.png b/2-produce-graph/hist-last-datasets-by-client.png
index 10e3a0c77f385581f0550ff21d0d1bf3b4b39cad..72f5d57dd5d14971c74b0cf90d9636a3ebfc6c65 100644
Binary files a/2-produce-graph/hist-last-datasets-by-client.png and b/2-produce-graph/hist-last-datasets-by-client.png differ
diff --git a/2-produce-graph/hist-quantity-year-type.png b/2-produce-graph/hist-quantity-year-type.png
index 15f604e16c8c93573ef8668b456ab7658746d36d..e7608cf1cd5e6a3304c9736a47a608f1734b548c 100644
Binary files a/2-produce-graph/hist-quantity-year-type.png and b/2-produce-graph/hist-quantity-year-type.png differ
diff --git a/2-produce-graph/pie--datacite-client.png b/2-produce-graph/pie--datacite-client.png
index e948a803ab06df98edda1a290164de6e8225ffd5..d1117ad93cf00a6d8ad33646959aa6bda21014e2 100644
Binary files a/2-produce-graph/pie--datacite-client.png and b/2-produce-graph/pie--datacite-client.png differ
diff --git a/2-produce-graph/pie--datacite-type.png b/2-produce-graph/pie--datacite-type.png
index 3e982d513de0bc0f4a71069b15beae920415fe84..eeb95003cc16c89ae7dcbd94c7ca58fa20f76a68 100644
Binary files a/2-produce-graph/pie--datacite-type.png and b/2-produce-graph/pie--datacite-type.png differ
diff --git a/dois-uga.csv b/dois-uga.csv
index c6ab5887fe960018d4badc94a05bf484182ff2d0..8d4b7052d82447b4b3664e942108957209f31a5d 100644
--- a/dois-uga.csv
+++ b/dois-uga.csv
@@ -6768,3 +6768,422 @@ The content and use of the test models are described into the pdf document: A se
 10.6084/m9.figshare.c.5114735,Expert consensus-based clinical practice guidelines management of intravascular catheters in the intensive care unit,figshare,2020,,Collection,Creative Commons Attribution 4.0 International,"Abstract The French Society of Intensive Care Medicine (SRLF), jointly with the French-Speaking Group of Paediatric Emergency Rooms and Intensive Care Units (GFRUP) and the French-Speaking Association of Paediatric Surgical Intensivists (ADARPEF), worked out guidelines for the management of central venous catheters (CVC), arterial catheters and dialysis catheters in intensive care unit. For adult patients: Using GRADE methodology, 36 recommendations for an improved catheter management were produced by the 22 experts. Recommendations regarding catheter-related infections’ prevention included the preferential use of subclavian central vein (GRADE 1), a one-step skin disinfection(GRADE 1) using 2% chlorhexidine (CHG)-alcohol (GRADE 1), and the implementation of a quality of care improvement program. Antiseptic- or antibiotic-impregnated CVC should likely not be used (GRADE 2, for children and adults). Catheter dressings should likely not be changed before the 7th day, except when the dressing gets detached, soiled or impregnated with blood (GRADE 2− adults). CHG dressings should likely be used (GRADE 2+). For adults and children, ultrasound guidance should be used to reduce mechanical complications in case of internal jugular access (GRADE 1), subclavian access (Grade 2) and femoral venous, arterial radial and femoral access (Expert opinion). For children, an ultrasound-guided supraclavicular approach of the brachiocephalic vein was recommended to reduce the number of attempts for cannulation and mechanical complications. Based on scarce publications on diagnostic and therapeutic strategies and on their experience (expert opinion), the panel proposed definitions, and therapeutic strategies.",mds,True,findable,0,0,0,0,0,2020-09-08T03:45:47.000Z,2020-09-08T03:45:49.000Z,figshare.ars,otjm,"Medicine,Cell Biology,Environmental Sciences not elsewhere classified,Biological Sciences not elsewhere classified,Marine Biology,Science Policy,Infectious Diseases,FOS: Health sciences","[{'subject': 'Medicine'}, {'subject': 'Cell Biology'}, {'subject': 'Environmental Sciences not elsewhere classified'}, {'subject': 'Biological Sciences not elsewhere classified'}, {'subject': 'Marine Biology'}, {'subject': 'Science Policy'}, {'subject': 'Infectious Diseases'}, {'subject': 'FOS: Health sciences', 'schemeUri': 'http://www.oecd.org/science/inno/38235147.pdf', 'subjectScheme': 'Fields of Science and Technology (FOS)'}]",,
 10.6084/m9.figshare.c.5226946,Impact of advance directives on the variability between intensivists in the decisions to forgo life-sustaining treatment,figshare,2020,,Collection,Creative Commons Attribution 4.0 International,"Abstract Background There is wide variability between intensivists in the decisions to forgo life-sustaining treatment (DFLST). Advance directives (ADs) allow patients to communicate their end-of-life wishes to physicians. We assessed whether ADs reduced variability in DFLSTs between intensivists. Methods We conducted a multicenter, prospective, simulation study. Eight patients expressed their wishes in ADs after being informed about DFLSTs by an intensivist-investigator. The participating intensivists answered ten questions about the DFLSTs of each patient in two scenarios, referring to patients’ characteristics without ADs (round 1) and then with (round 2). DFLST score ranged from 0 (no-DFLST) to 10 (DFLST for all questions). The main outcome was variability in DFLSTs between intensivists, expressed as relative standard deviation (RSD). Results A total of 19,680 decisions made by 123 intensivists from 27 ICUs were analyzed. The DFLST score was higher with ADs than without (6.02 95% CI [5.85; 6.19] vs 4.92 95% CI [4.75; 5.10], p < 0.001). High inter-intensivist variability did not change with ADs (RSD: 0.56 (round 1) vs 0.46 (round 2), p = 0.84). Inter-intensivist agreement on DFLSTs was weak with ADs (intra-class correlation coefficient: 0.28). No factor associated with DFLSTs was identified. A qualitative analysis of ADs showed focus on end-of-life wills, unwanted things and fear of pain. Conclusions ADs increased the DFLST rate but did not reduce variability between the intensivists. In the decision-making process using ADs, the intensivist’s decision took priority. Further research is needed to improve the matching of the physicians’ decision with the patient’s wishes. Trial registration ClinicalTrials.gov Identifier: NCT03013530. Registered 6 January 2017; https://clinicaltrials.gov/ct2/show/NCT03013530 .",mds,True,findable,0,0,0,0,0,2020-12-03T04:34:09.000Z,2020-12-03T04:34:14.000Z,figshare.ars,otjm,"Cell Biology,Biotechnology,Biological Sciences not elsewhere classified,Science Policy,Mental Health","[{'subject': 'Cell Biology'}, {'subject': 'Biotechnology'}, {'subject': 'Biological Sciences not elsewhere classified'}, {'subject': 'Science Policy'}, {'subject': 'Mental Health'}]",,
 10.5281/zenodo.10782398,Bounding the contribution of leads to sea spray aerosol in the high Arctic,Zenodo,2024,,Model,Creative Commons Attribution 4.0 International,,api,True,findable,0,0,0,0,1,2024-03-20T17:38:13.000Z,2024-03-20T17:38:13.000Z,cern.zenodo,cern,,,,
+10.15778/resif.4g2007,EMSO-Azores Ocean Bottom Seismometer Data,RESIF - Réseau Sismologique et géodésique Français,2024,en,Dataset,"OpenAccess,Creative Commons Attribution 4.0 International","This dataset contains seismological data from 5 ocean bottom seismometer stations in a 7-8 km aperture around Lucky Strike volcano from 2007 to present. Stations are collected using yearly deployments of mostly short-period seismometers, but one station per year may be a broadband instrument. The central instrument is collocated with the SeaMoN (Seafloor Monitoring Node) West of the EMSO-Azores facility",mds,True,findable,0,0,0,0,0,2024-03-29T08:11:10.000Z,2024-03-29T08:11:19.000Z,inist.resif,vcob,"(keywords),EMSO-France,EMSO-Azores,Ocean Bottom Seismometer,Mid-Oceanic Ridge Volcano,Hydrothermal Vents,Pressure sensor (hydrophone),Geophone,Broadband seismometer,SeaMoN","[{'subject': '(keywords)'}, {'subject': 'EMSO-France'}, {'subject': 'EMSO-Azores'}, {'subject': 'Ocean Bottom Seismometer'}, {'subject': 'Mid-Oceanic Ridge Volcano'}, {'subject': 'Hydrothermal Vents'}, {'subject': 'Pressure sensor (hydrophone)'}, {'subject': 'Geophone'}, {'subject': 'Broadband seismometer'}, {'subject': 'SeaMoN'}]",,"['MSEED', 'stationXML data']"
+10.5281/zenodo.10868304,Supplementary data to Frasson et al. 2024,Zenodo,2024,en,Dataset,Creative Commons Attribution 4.0 International,"Supplementary data and code for Frasson et al. 2024  accepted for publication in Solid Earth. The data consist of full snapshots of the geoids, topography, CMB heat flux, and composition at the CMB for the six cases of the study; the PCA outputs for the six cases; and movies showing the time evolutions of the outputs in the mantle convection models. The scripts can be ran on the snapshots to compute the principal component analysis (PCA) and the true polar wander (TPW).
+
+ 
+
+The full snapshots are given in HDF5 format, with one file per time step. They can be found in the archives MF.zip and MC.zip for the MF model and the MC model respectively. The files contain:
+
+
+
+The fields in the physical space. The first coordinate is the latitude, the second coordinate is the longitude (dataset ""data"").
+
+The latitudes and longitudes associated with the fields (datasets ""latitude"" and ""longitude"").
+
+Some information regarding the snapshots (attributes ""Time (Myr)"", ""Model"", ""Case"", ""Correction"", and ""Unit"").
+
+
+The files are called ""name_xxxx.h5"", where xxxx stands for the time in Myr before the end of the simulation.The names of the files are:
+
+
+
+qcmb: CMB heat flux
+
+geoid: Total geoid
+
+geoid_nolvv: No LVVs geoid
+
+topo: Topography
+
+prim: Composition at the CMB
+
+
+ 
+
+The PCA outputs are given in HDF5 format. Each file has an ""outputs"" and a ""grid"" group. The ""outputs"" section gives:
+
+
+
+The average heat flux pattern. The first coordinate is the latitude, the second coordinate is the longitude (dataset ""average"").
+
+The list of patterns for each PCA component in physical space. The first coordinate is the latitude, the second coordinate is the longitude (dataset ""pattern"").
+
+The time-dependent weight of the components. The first coordinate is the time, the second coordinate is the component number (dataset ""weights"").
+
+The singular values of the components  (dataset ""singular values"").
+
+The time in Myr (dataset ""Time"").
+
+The component numbers (dataset ""components"").
+
+
+The ""grid"" group gives the latitudes (dataset ""latitude"") and the longitudes (dataset ""longitude"") associated with the patterns.
+
+ 
+
+Description of the movies:
+
+
+
+outputs_MF: As in Fig. 2 in Frasson et al. 2024.
+
+outputs_MC: As in Fig. 3 in Frasson et al. 2024.
+
+qcmb_MF: CMB heat flux in cases MF0, MF1, MF2, and MF* in a Mollweide projection. Black lines delineate the edges of basal chemical piles.
+
+qcmb_MC: CMB heat flux in cases MC0 and MC1 in a Mollweide projection. Black lines delineate the edges of basal chemical piles.
+
+
+ 
+
+Description of the scripts
+
+
+
+PCA.py: Computes the PCA of the field stored in the path provided as argument for the given model.
+
+inertia.py: Computes the successive positions of the inertia axis. This script includes functions to rotate the outputs according to the positions of the inertia axis.",api,True,findable,0,0,0,0,0,2024-03-25T10:49:30.000Z,2024-03-25T10:49:30.000Z,cern.zenodo,cern,,,,
+10.5281/zenodo.10896138,"Dataset for manuscript : ""Weak and shallow frictional faults revealed by a large earthquake""",Zenodo,2024,,Dataset,Creative Commons Attribution 4.0 International,"This archive file contains datafiles used in ""Weak and shallow frictional faults revealed by a large earthquake"".README.txt files describing the datasets are available within the archive.",api,True,findable,0,0,0,0,1,2024-03-30T00:54:35.000Z,2024-03-30T00:54:36.000Z,cern.zenodo,cern,,,,
+10.26302/sshade/experiment_ap_20240312_0001,Visible-near-infrared reflectance spectra of olivine-pyroxene mixtures,SSHADE/UH-ApS (OSUG Data Center),2024,en,Dataset,"Any use of downloaded SSHADE data in a scientific or technical paper or a presentation is free but you should cite both SSHADE and the used data in the text ( 'first author' et al., year) with its full reference (with its DOI) in the main reference section of the paper (or in a special 'data citation' section) and, when available, the original paper(s) presenting the data.",Visible-near-infrared reflectance spectra of olivine-pyroxene mixtures,mds,True,findable,0,0,2,0,0,2024-03-28T11:48:30.000Z,2024-03-28T11:48:30.000Z,inist.sshade,mgeg,"laboratory measurement,diffuse reflection,macroscopic,Vis,Visible,NIR,Near-Infrared,reflectance factor,pyroxene,olivine,mineral,natural terrestrial,inosilicate,nesosilicate","[{'subject': 'laboratory measurement', 'subjectScheme': 'main'}, {'subject': 'diffuse reflection', 'subjectScheme': 'main'}, {'subject': 'macroscopic', 'subjectScheme': 'main'}, {'subject': 'Vis', 'subjectScheme': 'variables'}, {'subject': 'Visible', 'subjectScheme': 'variables'}, {'subject': 'NIR', 'subjectScheme': 'variables'}, {'subject': 'Near-Infrared', 'subjectScheme': 'variables'}, {'subject': 'reflectance factor', 'subjectScheme': 'variables'}, {'subject': 'pyroxene', 'subjectScheme': 'name'}, {'subject': 'olivine', 'subjectScheme': 'name'}, {'subject': 'mineral', 'subjectScheme': 'family'}, {'subject': 'natural terrestrial', 'subjectScheme': 'origin'}, {'subject': 'inosilicate', 'subjectScheme': 'compound type'}, {'subject': 'nesosilicate', 'subjectScheme': 'compound type'}]",['7 spectra'],['ASCII']
+10.5281/zenodo.10896122,"Codes for Muller et al., Fast uplift in the Southern Patagonian Andes due to long and short term deglaciation and the asthenospheric window underneath, EGU Solid Earth, 2024",Zenodo,2024,en,ComputationalNotebook,Creative Commons Attribution 4.0 International,,api,True,findable,0,0,0,0,0,2024-03-30T00:46:29.000Z,2024-03-30T00:46:29.000Z,cern.zenodo,cern,,,,
+10.5281/zenodo.10896121,"Codes for Muller et al., Fast uplift in the Southern Patagonian Andes due to long and short term deglaciation and the asthenospheric window underneath, EGU Solid Earth, 2024",Zenodo,2024,en,ComputationalNotebook,Creative Commons Attribution 4.0 International,,api,True,findable,0,0,0,0,1,2024-03-30T00:46:29.000Z,2024-03-30T00:46:29.000Z,cern.zenodo,cern,,,,
+10.5061/dryad.pg4f4qrxd,Biomass production at 2085 horizon for the Maurienne valley (French Alps) estimated using a Bayesian Belief Network,Dryad,2024,en,Dataset,Creative Commons Zero v1.0 Universal,"In mountains, grasslands managed for livestock production sustain local
+ economies, culture and identity. However, their future fodder production
+ is highly uncertain under climate change: while an extended growing season
+ may be beneficial, more frequent and intense summer droughts could also
+ reduce fodder quantity and quality. Land use and land cover (LULC) changes
+ are another major driver of regional grassland biomass production, but
+ combined effects of future land use transitions and climate change are
+ rarely quantified. We modelled combined climate and LULC scenarios for
+ grassland production of the Maurienne Valley (French Alps) by 2100. We
+ built a Bayesian Belief Network (BBN) from long-term grassland production
+ monitoring data complemented with expert knowledge. We assessed the
+ potential of two candidate adaptations, intensification as an incremental
+ solution, and silvopastoralism as a transformative solution to compensate
+ combined impacts of two climate scenarios and three land use change
+ scenarios. Total biomass production was far more sensitive to LULC than to
+ climate scenarios. Production losses were largest under the Conservation
+ LULC scenario (-28% on average between 2020 and 2085), followed by the
+ Tourism development scenario (-7%) and the Business-as-Usual scenario
+ (+3%). Climate change under RCP 8.5 altered the seasonality of production
+ by increasing potential production from May to July while decreasing
+ summer regrowth. Intensification somewhat compensated effects of climate
+ and LULC changes on biomass production, whereas silvopastoralism offered
+ only marginal gains. The Bayesian network model explicitly captured a
+ future increase in interannual variability in biomass production.
+ Synthesis and application: Changes in LULC are more decisive for global
+ biomass production than climate change. However, under the most extreme
+ climate change scenario (RCP8.5), the seasonal shift in production and
+ increased interannnual variability threaten the current grass-based
+ Protected Designation of Origin production system. Only the
+ intensification adaptation solution showed significant gains in total
+ biomass production. Still, the silvopastoralism would require less
+ investment compared to the intensification and have a similar efficiency
+ when assessing the gains of biomass by the surface concerned with
+ adaptation solutions.  ",mds,True,findable,0,0,1,0,0,2024-03-27T08:35:36.000Z,2024-03-27T08:35:36.000Z,dryad.dryad,dryad,"FOS: Earth and related environmental sciences,FOS: Earth and related environmental sciences,Grasslands,Bayesian Belief Network,Climate change,Drought adaptation","[{'subject': 'FOS: Earth and related environmental sciences', 'subjectScheme': 'fos'}, {'subject': 'FOS: Earth and related environmental sciences', 'schemeUri': 'http://www.oecd.org/science/inno/38235147.pdf', 'subjectScheme': 'Fields of Science and Technology (FOS)'}, {'subject': 'Grasslands', 'schemeUri': 'https://github.com/PLOS/plos-thesaurus', 'subjectScheme': 'PLOS Subject Area Thesaurus'}, {'subject': 'Bayesian Belief Network'}, {'subject': 'Climate change', 'schemeUri': 'https://github.com/PLOS/plos-thesaurus', 'subjectScheme': 'PLOS Subject Area Thesaurus'}, {'subject': 'Drought adaptation', 'schemeUri': 'https://github.com/PLOS/plos-thesaurus', 'subjectScheme': 'PLOS Subject Area Thesaurus'}]",['31171791 bytes'],
+10.5281/zenodo.10882070,HOD-Dependent Systematics for Luminous Red Galaxies in the DESI 2024 BAO Analysis,Zenodo,2024,en,OutputManagementPlan,Creative Commons Attribution 4.0 International,"Supplementary data and code to reproduce the figures of ""HOD-Dependent Systematics for Luminous Red Galaxies in the DESI 2024 BAO Analysis"".",api,True,findable,0,0,0,0,0,2024-03-26T18:27:41.000Z,2024-03-26T18:27:42.000Z,cern.zenodo,cern,,,,
+10.5281/zenodo.10882069,HOD-Dependent Systematics for Luminous Red Galaxies in the DESI 2024 BAO Analysis,Zenodo,2024,en,OutputManagementPlan,Creative Commons Attribution 4.0 International,"Supplementary data and code to reproduce the figures of ""HOD-Dependent Systematics for Luminous Red Galaxies in the DESI 2024 BAO Analysis"".",api,True,findable,0,0,0,0,1,2024-03-26T18:27:42.000Z,2024-03-26T18:27:42.000Z,cern.zenodo,cern,,,,
+10.5281/zenodo.10896139,"Dataset for manuscript : ""Weak and shallow frictional faults revealed by a large earthquake""",Zenodo,2024,,Dataset,Creative Commons Attribution 4.0 International,"This archive file contains datafiles used in ""Weak and shallow frictional faults revealed by a large earthquake"".README.txt files describing the datasets are available within the archive.",api,True,findable,0,0,0,0,0,2024-03-30T00:54:35.000Z,2024-03-30T00:54:35.000Z,cern.zenodo,cern,,,,
+10.5281/zenodo.10895011,Data for 'Mapping and characterization of avalanches on mountain glaciers with Sentinel-1 satellite imagery',Zenodo,2024,,Dataset,Creative Commons Attribution 4.0 International,"This dataset contains avalanche deposit outlines (as shapefiles) derived for the study 'Mapping and characterization of avalanches on mountain glaciers with Sentinel-1 satellite imagery'
+
+ 
+
+They were outlined at three different sites (Mt Blanc, Everest and Hispar regions) for the periods 11/2016-10/2021 (Mt Blanc) and 11/2017-10/2022 (Everest and Hispar). The time period is indicated in the file name.
+
+ 
+
+For each dataset we give the raw outlines (Automated_outlines_dates), the manually updated (Automated_outlines_dates_ManualUpd) and the manually updated after accounting for surface elevation change (Automated_outlines_dates_ManualUpd_shifted). 
+
+ 
+
+In order to know which scenes were used for the mapping (if no avalanche was detected, we did not provide a shapefile, but this doesn't been that there is a gap in the Sentinel-1 time series), we provide a Sentinel1_date file that shows all the Sentinel-1 RGB pairs that we used to detect the avalanches.
+
+ 
+
+We also provide as geotiffs the temporally aggregated outlines (Automated_outlines_dates_ManualUpd_shifted_aggregated; over one specific year yn - from 01/11/yn-1 to 01/11/yn - or the full study period):
+
+- as heatmaps (where the value of each pixel corresponds to the number of avalanches that occured) 
+
+- as binary maps of deposits (where 1 is when an avalanche occured over the time period and 0 is where none were detected).
+
+ 
+
+ 
+
+Finally we provide a csv file for each region with metrics per glacier:
+
+ 
+
+RGI ID
+
+Glacier size (in m^2)
+
+Catchment size (in m^2)
+
+Area of slopes steeper than 30° (in m^2)
+
+The area of total deposits detected (by summing all the pixels of the deposit binary maps) in the ascending obits (in m^2)
+
+The area of total deposits detected (by summing all the pixels of the deposit binary maps) in the descending obits (in m^2)
+
+The avalanche activity detected (by summing all pixels of the heat maps) in the ascending orbits (in m^2)
+
+The avalanche activity detected (by summing all pixels of the heat maps) in the descending orbits (in m^2)
+
+The area of the glacier visible in the ascending orbits (in m^2)
+
+The area of the glacier visible in the descending orbits (in m^2)
+
+ 
+
+ 
+
+The main Google Earth Engine and Matlab scripts used to pre-process the Sentinel-1 GRD images and to map the avalanches are available on GitHub: https://github.com/MarinKneib/S1_avalanches
+
+ ",api,True,findable,0,0,0,0,0,2024-03-29T15:43:58.000Z,2024-03-29T15:43:59.000Z,cern.zenodo,cern,,,,
+10.5281/zenodo.10899175,NeoGeographyToolkit/StereoPipeline: 2024-03-30-daily-build,Zenodo,2024,,Software,Creative Commons Attribution 4.0 International,Recent additions log: https://stereopipeline.readthedocs.io/en/latest/news.html,api,True,findable,0,0,0,0,0,2024-03-30T17:51:40.000Z,2024-03-30T17:51:41.000Z,cern.zenodo,cern,,,,
+10.5281/zenodo.10889881,MedDialog-FR: a French Version of the MedDialog Corpus for Multi-label Classification and Response Generation related to Women's Intimate Health,Zenodo,2024,fr,Dataset,Creative Commons Attribution 4.0 International,"MedDialog-FR: a French Version of the MedDialog Corpus for Multi-label Classification and Response Generation related to Women's Intimate Health
+
+ 
+
+Contributors: Xingyu Liu, Vincent Segonne, Aidan Mannion, Didier Schwab, Lorraine Goeuriot, François Portet
+
+ 
+
+Total Number of Single-Turn Dialogues: 16,149 dialogues of women's intimate health, 7,120 dialogues of general medicine
+
+ 
+
+Given the lack of French dialogue corpora for data-driven dialogue systems and the paucity of available information related to women's intimate health, MedDialog-FR is an annotated corpus of question-and-answer sessions between a patient and a doctor concerning women's intimate health. The corpus is composed of about 20,000 sessions automatically translated from the English version of MedDialog-EN. The corpus test set is composed of 1,400 sessions that have been manually post-edited and annotated with 22 categories from the UMLS ontology.
+
+ 
+
+Overview of the dataset
+
+ 
+
+To construct the French MedDialog Dataset (MedDialog-FR), we initially extracted from MedDialog-EN and automatically translated a total of 16,149 dialogues related to women's intimate health and an additional 7,120 dialogues related to general medicine. MedDialog-EN is composed of textual single-turn dialogues: a medical question by a patient and a response by a physician. From the translated dialogues, we randomly selected 900 dialogues on women's intimate health and 500 dialogues concerning general medicine to be post-edited. Subsequently, we performed multi-label annotation on the 900 questions extracted from these same dialogues focused on women's intimate health. In total, 1,286 labels were annotated, with 1.43 labels per instance in average.
+
+ 
+
+The summary of the statistics of the dataset:
+
+
+
+
+Task
+Women
+General
+
+
+Machine translation (#dialogs)
+16,149
+7,120
+
+
+Post-editing (#dialogs)
+900
+500
+
+
+Multi-label annotation (#questions)
+900
+-
+
+
+
+
+ 
+
+Structure of the dataset
+
+ 
+
+The dataset contains the following elements separated in general medicine domain (MedDialog-FR-general) and women's intimate health domain (MedDialog-FR-women):
+
+```
+
+├── MedDialog-FR-general/
+
+├──── machine_translation/meddialog-fr-general_machine_translation.csv
+
+├──── post-editing/meddialog-fr-general_post-editing.csv
+
+ 
+
+├── MedDialog-FR-women/
+
+├──── machine_translation/meddialog-fr-women_machine_translation.csv
+
+├──── post-editing/meddialog-fr-women_post-editing.csv
+
+├──── multilabel_annotation/dataset_multilabel_meddialog_22labels.csv
+
+├──── response_generation/dataset_response_generation_meddialog.csv
+
+ 
+
+```
+
+All the .csv files contain a column named id, which indicates the original file of *MedDialog-EN* with the id in that file. For example, hm3_96_q or hm3_96_a refers to the session with the id of 96 within the healthcaremaginc3 file. The suffix of '_q' and '_a' indicates question and answer
+
+ 
+
+Machine translation
+
+The .csv file contains 3 columns: id, en and fr
+
+- en: original question and answer in English
+
+- fr: translated question and answer in French
+
+ 
+
+Example lines:
+
+hm4_1121_q \t J'ai 52 ans, mes dernières règles remontent au 6 décembre, je pensais que c'était peut-être le début de la ménopause. J'ai fait un test d'urine pour la grossesse, qui s'est révélé positif, puis j'ai fait un test quantitatif de hcg 45343 (je suis infirmière et je l'ai fait au laboratoire de l'hôpital où je travaille). J'ai des crampes et des saignements (bruns) depuis 2 à 3 mois.
+
+ 
+
+hm4_1121_a \t Bonjour, j'ai compris votre préoccupation. Comme vous avez mentionné que le taux de bêta HCG est plus élevé, je vous suggère de faire une échographie. Cela confirmera l'âge gestationnel et la viabilité de la grossesse. Si vous tenez à poursuivre la grossesse, veuillez discuter des risques encourus avec votre gynécologue traitant. Vous pouvez également opter pour une interruption de grossesse avec des médicaments en toute sécurité jusqu'à 9 semaines de grossesse sous surveillance médicale. J'espère que cette réponse vous aidera.
+
+ 
+
+Post-editing
+
+The .csv file contains 3 columns: id, machine_translation and post-edited
+
+ 
+
+Example line:
+
+hm4_3334_q \t bonjour docteur je suis atteinte de pcos, je me suis mariée en novembre 2011.nous essayons d'avoir une grossesse depuis deux mois ... et ma question est comment savoir la sévérité du pcos et quel est le meilleur moment pour concv \t bonjour docteur, je suis atteinte de SOPK, je me suis mariée en novembre 2011. Nous essayons de concevoir depuis deux mois ... et ma question est comment savoir la sévérité du SOPK et quel est le meilleur moment pour concevoir.
+
+ 
+
+Multi-label annotation
+
+The .csv file contains 5 columns: id, source_file, labels and split.
+
+- source_file: the source file where the text content for classification can be found with id
+
+- labels: UMLS IDs representing expert-validated labels for classification
+
+- split: train, dev or test
+
+ 
+
+Example line:
+
+hm1_33568_q \t '../post-editing/meddialog-fr-women_post-editing.csv' \t ['C0700589', 'C0227791'] \t train
+
+
+
+Partitioning:
+
+We split the MedDialog-FR-women multi-label dataset into a training set of 500 instances, a validation set of 100 instances and a test set of 300 instances. The ratio was chosen to balance the need for maximizing the amount of fine-tuning data available while also ensuring that the test set is large enough for the results to be statistically significant, given the scarcity of some categories. The split statistics are summarized in the following table. To maintain consistent label distribution, we leveraged the iterative stratification algorithm during the data splitting process.
+
+
+
+
+Split
+#Questions
+
+
+Train
+500
+
+
+Validation
+100
+
+
+Test
+300
+
+
+
+
+ 
+
+Response generation
+
+The .csv file contains 3 columns: id, split and source_file
+
+- split: train, dev or test
+
+- source_file: the source file where the text content for response generation can be found with id
+
+Partitioning:
+
+The validation and test data contain the same session ID as the multi-label validation and test, but they include the corresponding answers. for the training set, we use the same ones as multi-label dataset plus the machine translated sessions.
+
+ 
+
+
+
+
+Split
+#Dialogues
+
+
+Train
+15,749
+
+
+Validation
+100
+
+
+Test
+300
+
+
+
+
+ 
+
+
+
+Corpus data cleaning
+
+By examining the MedDialog-EN corpus, we identified data that could potentially leak personal information such as the first and last name, email address, URL, etc. In order to safeguard privacy, we conducted a series of data cleaning procedures, especially anonymization:
+
+1. replace URLs with #URL# (regex pattern: `https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)
+
+`)
+
+2. replace emails with #EMAIL# (regex pattern: `^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}`)
+
+3. replace phone numbers with #TEL# (regex pattern: `^[\+]?[(]?[0-9]{3}[)]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}`)
+
+4. replace dates with #DATE# (regex patterns: `\d{1,2}\/\d{1,2}\/\d{2,4}
+
+`; `(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+(\d{1,2})\s+(\d{4})`)
+
+5. replace hospital or clinic names with #HOSPITAL# (text patterns: `clinic`; `hospital`)
+
+6. replace names in questions with #Person1#, and names in answers with #Person2#. If there is a name in an answer identical to the name in its question, replace it with #Person1#. (text patterns: `I am`; `I'm`;`Dr`; `Doctor`)
+
+7. replace the names of data source forums with coded letters (text patterns: forum names)
+
+
+
+Ethics Statement and Limitations
+
+Access to actual medical data is very restricted and protected in France. We thus used an already publicly available corpus in English. But we did not simply translate it. We first made sure that no personal information could be found in the data. This is why we replaced all names that could have been kept in the original data. We also performed post-edition after automatic translation to adapt the phrasing and medical term to the French culture. All people recruited for annotation were treated fairly. This includes, but is not limited to, compensating them fairly and ensuring that they were voluntary participants. We do not foresee any direct social consequences or ethical issues.
+
+ 
+
+Authors of MedDialog were warned at our project and answered our questions.
+
+ 
+
+Since the original corpus is derived from dialogues in the U.S.A., there might be some cultural differences with French-speaking countries in the way people interact with doctors and which treatments and medical advises can be provided.
+
+ 
+
+Answers to questions should not be applied for self-treatment.
+
+ ",api,True,findable,0,0,0,0,0,2024-03-29T09:25:42.000Z,2024-03-29T09:25:42.000Z,cern.zenodo,cern,"Medical Corpus,Women's Intimate Health,Multi-label Question Classification,Response Generation","[{'subject': 'Medical Corpus'}, {'subject': ""Women's Intimate Health""}, {'subject': 'Multi-label Question Classification'}, {'subject': 'Response Generation'}]",,