Skip to content
Snippets Groups Projects
Commit 28ac4b6d authored by Maxence Larrieu's avatar Maxence Larrieu
Browse files

graph last days added

parent 9c90a434
No related branches found
No related tags found
No related merge requests found
client,count
cern.zenodo,936
inist.sshade,468
inist.osug,238
figshare.ars,183
dryad.dryad,156
inist.resif,78
inist.persyval,55
rdg.prod,45
inist.humanum,28
figshare.sage,16
mcdy.dohrmi,12
rg.rg,4
vqpf.dris,3
tib.gfzbib,3
iris.iris,3
ugraz.unipub,2
inist.epure,2
bl.nerc,2
tib.repod,2
estdoi.ttu,1
inist.omp,1
tib.gfz,1
edi.edi,1
inist.opgc,1
bl.iita,1
ardcx.nci,1
umass.uma,1
crui.ingv,1
bl.mendeley,1
ethz.zora,1
inist.ird,1
inist.eost,1
ihumi.pub,1
tug.openlib,1
2-produce-graph/hist-last-datasets-by-client.png

46.7 KiB

import pandas as pd, matplotlib, matplotlib.pyplot as plt
import z_my_functions as my_fct
from datetime import datetime, timezone
temp_date = pd.to_datetime('2024-02-01', format='%Y-%m-%d')
df = my_fct.load_and_treat_csv()
# sort df to the most recent
df.sort_values(by = "registered", ascending = False, inplace = True, ignore_index = True )
print(df.columns)
## transform to date format
df["registered_date"] = pd.to_datetime( df["registered"], errors='coerce')
## age of deposit is current time - date of registration
# df["age_of_deposit_days"] = datetime.now(timezone.utc) - df["registered_date"] ## this give a full time difference
df["age_of_deposit_days"] = (datetime.now(timezone.utc) - df['registered_date']).dt.total_seconds() / 60 / 60 / 24
# depot sur les 30 derniers jours
df_last_weeks = df[ df["age_of_deposit_days"] <= 30]
df_graph = pd.crosstab(df_last_weeks["registered"].str[5:10], df_last_weeks["client"])
df_graph.index.rename("Day of registration", inplace = True)
## ______0______ produce graphs
## a set of color via plt
### see color palett https://matplotlib.org/stable/users/explain/colors/colormaps.html
colors = [plt.cm.Set3(i) for i in range(len(df_graph.columns))]
ax = df_graph.plot(
kind = "bar",
figsize = (10, 7),
stacked = True,
width = 0.3,
color = colors,
rot = 80
)
## _______ configurer l'afichage
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_ylabel("Number of datasets", labelpad = 2)
ax.yaxis.grid(ls=":", alpha=0.2)
ax.tick_params(axis='both', which='major', labelsize=8)
# plt.xlabel(None)
plt.legend(reverse = False)
plt.title(f"Datasets registered over the last 30 days\ndistributed by DataCite client",
fontsize = 18, x = 0.5, y = 1.03, alpha = 0.8)
plt.suptitle(f"n = {len(df_last_weeks)}", fontsize = 12, x = 0.5, y = 0.89, alpha = 0.6)
plt.savefig("hist-last-datasets-by-client.png")
\ No newline at end of file
2-produce-graph/pie--datacite-client.png

43.5 KiB | W: | H:

2-produce-graph/pie--datacite-client.png

45.5 KiB | W: | H:

2-produce-graph/pie--datacite-client.png
2-produce-graph/pie--datacite-client.png
2-produce-graph/pie--datacite-client.png
2-produce-graph/pie--datacite-client.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -5,24 +5,24 @@ df = my_fct.load_and_treat_csv()
#print(df.columns)
df_client_raw = df["client"].value_counts()
df_client_raw.to_csv("all_datacite_clients.csv")
## regroup small values in "other"
treshold = 20
df_client = df_client_raw[df_client_raw > treshold]
## rework client name (eg cern.zenodo)
clients_name = []
for item in df_client.index :
short_name = item[: item.find(".")]
if short_name not in ["inist", "jbru"] :
clients_name.append( short_name.capitalize())
else :
clients_name.append(item)
## used before to rework client name, but finally we used the raw datacite client eg cern.zenodo
# clients_name = []
# for item in df_client.index :
# short_name = item[: item.find(".")]
# if short_name not in ["inist", "jbru"] :
# clients_name.append( short_name.upper())
# else :
# clients_name.append(item)
print(clients_name)
df_client["other"] = df_client_raw[df_client_raw <= treshold].sum()
clients_name.append("other")
# a set of color via plt
......@@ -31,7 +31,7 @@ colors = [plt.cm.Set3(i) for i in range(len(df_client))]
#colors = sns.color_palette('pastel')[0:len(df_client)]
plt.pie(df_client, labels = clients_name, colors = colors, autopct='%.0f%%')
plt.pie(df_client, labels = df_client.index, colors = colors, autopct='%.0f%%')
plt.title(f"Distribution of datasets by DataCite client", fontsize = 20, x = 0.5, y = 1.03, alpha = 0.6)
plt.suptitle(f"n = {len(df)}", fontsize = 11, x = 0.5, y = 0.90, alpha = 0.6)
plt.savefig("pie--datacite-client.png")
......
# UGA Open Research Data monitor
# Codes for the UGA Open research data monitor
Let's describe open research data produced by Grenoble Alpes University !
view contextualized results on the website : [mlarrieu.gricad-pages.univ-grenoble-alpes.fr/open-research-data-monitor](https://mlarrieu.gricad-pages.univ-grenoble-alpes.fr/open-research-data-monitor)
We name "open research data" a dataset registred with DataCite.
_work in progress_
## Examples
![](2-produce-graph/hist-evol-datasets-per-repo.png)
![](2-produce-graph/hist-quantity-year-type.png)
![](2-produce-graph/pie--datacite-client.png)
![](2-produce-graph/pie--datacite-type.png)
## Sources
(so far)
| |Dataset numbers| UGA perimeter |
|-----------|---------------|---------------|
|RDG |42 |contact, auteurs, producteur et contributeurs avec "UGA" OR "Grenoble" |
|DataCite |1247| creator et contributor avec ROR + clients & publisher |
|Zenodo |1041|creator et contributor avec "grenoble" |
|Nakala |32 |UGA user identifiers |
|BSO via HAL|32 |NA |
|... | |
| | UGA perimeter |
|-----------|---------------|
|RDG |contact, auteurs, producteur et contributeurs avec "UGA" OR "Grenoble" |
|DataCite |creator et contributor avec ROR + clients & publisher |
|Zenodo |creator et contributor avec "grenoble" |
|Nakala |UGA user identifiers |
|BSO via HAL|NA |
|... | |
## Credit
## Credits
* Élias Chetouane: collecting data, program automation
* Maxence Larrieu: collecting data, enrichment & visualisation
as members of the [Cellule Data Grenoble Alpes](https://scienceouverte.univ-grenoble-alpes.fr/donnees/accompagner/cellule-data-grenoble-alpes/)
as members of GRICAD & CDGA
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment