import pandas as pd, matplotlib, matplotlib.pyplot as plt import z_my_functions as my_fct df = my_fct.load_and_treat_csv() #print(df.columns) df_client_raw = df["client"].value_counts() df_client_raw.to_csv("all_datacite_clients.csv") ## regroup small values in "other" treshold = 20 df_client = df_client_raw[df_client_raw > treshold] ## rework client name (eg cern.zenodo) ## used before to rework client name, but finally we used the raw datacite client eg cern.zenodo # clients_name = [] # for item in df_client.index : # short_name = item[: item.find(".")] # if short_name not in ["inist", "jbru"] : # clients_name.append( short_name.upper()) # else : # clients_name.append(item) df_client["other"] = df_client_raw[df_client_raw <= treshold].sum() # a set of color via plt ### see color palett https://matplotlib.org/stable/users/explain/colors/colormaps.html colors = [plt.cm.Set3(i) for i in range(len(df_client))] #colors = sns.color_palette('pastel')[0:len(df_client)] plt.subplots(figsize=(10, 7)) plt.pie(df_client, labels = df_client.index, colors = colors, autopct='%.0f%%') plt.title(f"Distribution of datasets by DataCite client", fontsize = 20, x = 0.5, y = 1.03, alpha = 0.6) plt.suptitle(f"n = {len(df)}", fontsize = 11, x = 0.5, y = 0.90, alpha = 0.6) plt.savefig("pie--datacite-client.png") print(f"\ngraph produced pie--datacite-client.png") # print(len(df))