Skip to content
Snippets Groups Projects
pie-datacite-client.py 1.37 KiB
Newer Older
Maxence Larrieu's avatar
Maxence Larrieu committed
import pandas as pd, matplotlib, matplotlib.pyplot  as plt
import z_my_functions as my_fct

df = my_fct.load_and_treat_csv()
Maxence Larrieu's avatar
Maxence Larrieu committed
#print(df.columns)
Maxence Larrieu's avatar
Maxence Larrieu committed

df_client_raw = df["client"].value_counts()
df_client_raw.to_csv("all_datacite_clients.csv")
Maxence Larrieu's avatar
Maxence Larrieu committed

## regroup small values in "other"
Maxence Larrieu's avatar
Maxence Larrieu committed
treshold = 20
Maxence Larrieu's avatar
Maxence Larrieu committed
df_client = df_client_raw[df_client_raw > treshold]

Maxence Larrieu's avatar
Maxence Larrieu committed
## rework client name (eg cern.zenodo)
## used before to rework client name, but finally we used the raw datacite client eg cern.zenodo
# clients_name = []
# for item in df_client.index :
# 	short_name = item[: item.find(".")]
# 	if short_name not in ["inist", "jbru"] : 
# 		clients_name.append( short_name.upper())
# 	else : 
# 		clients_name.append(item)
Maxence Larrieu's avatar
Maxence Larrieu committed

df_client["other"] = df_client_raw[df_client_raw <= treshold].sum()


Maxence Larrieu's avatar
Maxence Larrieu committed
# a set of color via plt
### see color palett https://matplotlib.org/stable/users/explain/colors/colormaps.html
colors = [plt.cm.Set3(i) for i in range(len(df_client))]

#colors = sns.color_palette('pastel')[0:len(df_client)]
Maxence Larrieu's avatar
Maxence Larrieu committed
plt.subplots(figsize=(10, 7))
plt.pie(df_client, labels = df_client.index, colors = colors, autopct='%.0f%%')
Maxence Larrieu's avatar
Maxence Larrieu committed
plt.title(f"Distribution of datasets by DataCite client", fontsize = 20, x = 0.5, y = 1.03, alpha = 0.6)
plt.suptitle(f"n = {len(df)}", fontsize = 11, x = 0.5, y = 0.90, alpha = 0.6)
Maxence Larrieu's avatar
Maxence Larrieu committed
plt.savefig("pie--datacite-client.png")
Maxence Larrieu's avatar
Maxence Larrieu committed
print(f"\ngraph produced pie--datacite-client.png")
Maxence Larrieu's avatar
Maxence Larrieu committed


# print(len(df))