Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd, matplotlib, matplotlib.pyplot as plt
import z_my_functions as my_fct
from matplotlib import colormaps
df = my_fct.load_and_treat_csv()
print(df.columns)
## print this to see raw data & adapt graph
# print(df.client.value_counts())
## create a col with yyyy-mm
df["year-month"] = df["registered"].str[:7]
# print(df["year-month"][:1])
## produce a col with datacite clients limited to top 10 in quantity
def reduce_client(client) :
"""
réduire la liste de client
fait le 2024-01-12
matching à surveiller régulièrement
"""
if client == "cern.zenodo" :
return "Zenodo"
if client.startswith("figshare") :
return "Figshare"
if client == "inist.osug" or client == "inist.persyval" or client == "inist.sshade" :
return "OSUG"
if client == "dryad.dryad" :
return "Dryad"
if client == "jbru.aau" :
return "AAU"
if client == "rdg.prod" :
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
if client == "inist.humanum" :
return "Nakala"
else :
return "other"
df["client_reduced"] = df.apply(lambda row : reduce_client(str(row.client)) , axis = 1)
df_evol_linear = pd.crosstab(df["year-month"], df["client_reduced"])
df_evol_linear.index.rename("year-month", inplace = True)
## make count value cumulatif "cumsum"
df_evol = df_evol_linear.cumsum(axis='index')
## ______0______ produce graphs
fig, (ax) = plt.subplots(figsize=(10, 7), dpi = 100, facecolor='w', edgecolor='k')
## a set of color via plt
### see color palett https://matplotlib.org/stable/users/explain/colors/colormaps.html
colors = [plt.cm.Set3(i) for i in range(len(df_evol.columns))]
plt.stackplot(
df_evol.index,
[df_evol[col].tolist() for col in df_evol.columns],
labels = df_evol.columns,
colors = colors,
baseline = "zero")
plt.legend(loc="center", reverse = True, bbox_to_anchor=(0.49, 0.65), fontsize = 11)
# ______0______ configurer le rendu
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_ylabel("Total of datasets", labelpad = 10)
#ax.set_xlabel("Date of DOI registration", labelpad = 10)
ax.yaxis.grid(ls=":", alpha=0.5)
## x label only for January
## we need idx for df for january plus label
x_idx_toshow = []
x_label_toshow = []
for i, date in enumerate(df_evol.index) :
# sil s'agit bien du mois de janvier
if date.endswith("-01") :
x_idx_toshow.append(i)
x_label_toshow.append(date[:-3])
ax.set_xticks(x_idx_toshow)
ax.set_xticklabels(x_label_toshow, rotation=70, fontsize=10)
plt.title(f"Evolution of the quantity of UGA open datasets\n and distribution per repository", \
fontsize = 18, x = 0.5, y = 1.03, alpha = 0.8)
plt.suptitle(f"n = {len(df)}", fontsize = 12, x = 0.5, y = 0.87, alpha = 0.6)