Newer
Older
import pandas as pd, matplotlib, matplotlib.pyplot as plt
import z_my_functions as my_fct
df = my_fct.load_and_treat_csv()
df_client = df_client_raw[df_client_raw > treshold]
## used before to rework client name, but finally we used the raw datacite client eg cern.zenodo
# clients_name = []
# for item in df_client.index :
# short_name = item[: item.find(".")]
# if short_name not in ["inist", "jbru"] :
# clients_name.append( short_name.upper())
# else :
# clients_name.append(item)
df_client["other"] = df_client_raw[df_client_raw <= treshold].sum()
# a set of color via plt
### see color palett https://matplotlib.org/stable/users/explain/colors/colormaps.html
colors = [plt.cm.Set3(i) for i in range(len(df_client))]
#colors = sns.color_palette('pastel')[0:len(df_client)]
plt.pie(df_client, labels = df_client.index, colors = colors, autopct='%.0f%%')
plt.title(f"Distribution of datasets by DataCite client", fontsize = 20, x = 0.5, y = 1.03, alpha = 0.6)
plt.suptitle(f"n = {len(df)}", fontsize = 11, x = 0.5, y = 0.90, alpha = 0.6)