import pandas as pd, matplotlib, matplotlib.pyplot as plt import z_my_functions as my_fct from matplotlib import colormaps df = my_fct.load_and_treat_csv() print(df.columns) ## print this to see raw data & adapt graph # print(df.client.value_counts()) ## create a col with yyyy-mm df["year-month"] = df["registered"].str[:7] # print(df["year-month"][:1]) ## produce a col with datacite clients limited to top 10 in quantity def reduce_client(client) : """ réduire la liste de client fait le 2024-01-12 matching à surveiller régulièrement """ if client == "cern.zenodo" : return "Zenodo" if client.startswith("figshare") : return "Figshare" if client == "inist.osug" or client == "inist.persyval" or client == "inist.sshade" : return "OSUG" if client == "dryad.dryad" : return "Dryad" if client == "jbru.aau" : return "AAU" if client == "rdg.prod" : return "RDG" if client == "inist.humanum" : return "Nakala" else : return "other" df["client_reduced"] = df.apply(lambda row : reduce_client(str(row.client)) , axis = 1) df_evol_linear = pd.crosstab(df["year-month"], df["client_reduced"]) df_evol_linear.index.rename("year-month", inplace = True) ## make count value cumulatif "cumsum" df_evol = df_evol_linear.cumsum(axis='index') ## ______0______ produce graphs fig, (ax) = plt.subplots(figsize=(10, 7), dpi = 100, facecolor='w', edgecolor='k') ## a set of color via plt ### see color palett https://matplotlib.org/stable/users/explain/colors/colormaps.html colors = [plt.cm.Set3(i) for i in range(len(df_evol.columns))] plt.stackplot( df_evol.index, [df_evol[col].tolist() for col in df_evol.columns], labels = df_evol.columns, colors = colors, baseline = "zero") plt.legend(loc="center", reverse = True, bbox_to_anchor=(0.49, 0.65), fontsize = 11) # ______0______ configurer le rendu ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_ylabel("Total of datasets", labelpad = 10) #ax.set_xlabel("Date of DOI registration", labelpad = 10) ax.yaxis.grid(ls=":", alpha=0.5) ## x label only for January ## we need idx for df for january plus label x_idx_toshow = [] x_label_toshow = [] for i, date in enumerate(df_evol.index) : # sil s'agit bien du mois de janvier if date.endswith("-01") : x_idx_toshow.append(i) x_label_toshow.append(date[:-3]) ax.set_xticks(x_idx_toshow) ax.set_xticklabels(x_label_toshow, rotation=70, fontsize=10) plt.title(f"Evolution of the quantity of UGA open datasets\n and distribution per repository", \ fontsize = 18, x = 0.5, y = 1.03, alpha = 0.8) plt.suptitle(f"n = {len(df)}", fontsize = 12, x = 0.5, y = 0.87, alpha = 0.6) #plt.show() plt.savefig("hist-evol-datasets-per-repo.png")