Skip to content
Snippets Groups Projects
Commit 046c0432 authored by Maxence Larrieu's avatar Maxence Larrieu
Browse files

add dataset type graph

parent 8c3677f3
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
10.34847/nkl.5bcck3cz
10.34847/nkl.ca709965
10.34847/nkl.a0fe865m 10.34847/nkl.a0fe865m
10.34847/nkl.76abr599 10.34847/nkl.76abr599
10.34847/nkl.6caam3dp 10.34847/nkl.6caam3dp
10.34847/nkl.5bcck3cz
10.34847/nkl.ca709965
10.34847/nkl.ca8dmbdh 10.34847/nkl.ca8dmbdh
10.34847/nkl.a5ae8y33 10.34847/nkl.a5ae8y33
10.34847/nkl.748eqz51 10.34847/nkl.748eqz51
...@@ -22,5 +22,11 @@ ...@@ -22,5 +22,11 @@
10.34847/nkl.3dbc2mtb 10.34847/nkl.3dbc2mtb
10.34847/nkl.bc2b1071 10.34847/nkl.bc2b1071
10.34847/nkl.81dcdekj 10.34847/nkl.81dcdekj
10.34847/nkl.ef903o6v 10.34847/nkl.b1cb3arm
10.34847/nkl.ae94a74k 10.34847/nkl.c9e92or4
10.34847/nkl.bf5f263z
10.34847/nkl.9f85iol5
10.34847/nkl.345bf9i7
10.34847/nkl.9cd8hi4k
10.34847/nkl.e1e41vdi
10.34847/nkl.deb655as
...@@ -15,4 +15,6 @@ egreslou ...@@ -15,4 +15,6 @@ egreslou
troulet troulet
mbeligne mbeligne
acarbonnelle acarbonnelle
annegf annegf
\ No newline at end of file tleduc
abey
\ No newline at end of file
...@@ -104,8 +104,9 @@ for user in nakala_uga_users : ...@@ -104,8 +104,9 @@ for user in nakala_uga_users :
with open("nakala-dois.txt", 'w') as fh : with open("nakala-dois.txt", 'w') as fh :
[fh.write(f"{line}\n") for line in all_dois] [fh.write(f"{line}\n") for line in all_dois]
## print les autres utilisateurs trouvés7 ## print les autres utilisateurs trouvés
print("\n\n nakala new user finded ") if other_user_finded :
for elem in other_user_finded : print("\n\n nakala new user finded ")
print("\t\telem") for elem in other_user_finded :
print(f"\t\t{elem}")
10.57745/QOA1QO 10.57745/52HT2L
10.57745/GZKUZS 10.57745/7RFNNP
10.57745/J2A44Q 10.57745/UOGRPY
10.15454/M7OK9E 10.57745/GRHRZJ
10.57745/QOA1QO 10.57745/7HF7KG
10.57745/GZKUZS
10.57745/NOHRHJ
10.57745/JOZ1NA
10.57745/BYWEA3
10.57745/J2A44Q
10.57745/QOA1QO
10.57745/B6PSX0
10.57745/BYWEA3
10.57745/TVAHUQ
10.57745/BYWEA3
10.57745/QCVYG3
10.57745/NGC4J0
10.57745/HZDPTT
10.57745/69UNAM
10.57745/ENJADK
10.57745/GZKUZS
10.57745/ENJADK 10.57745/ENJADK
10.57745/LUTMNE 10.57745/69UNAM
10.57745/NZFWP9 10.57745/NZFWP9
10.57745/LXTWNG 10.57745/R1NIKK
10.57745/QCVYG3
10.57745/KTFZQD
10.57745/B6PSX0
10.15454/M7OK9E
10.57745/CM2WOI
10.57745/GZKUZS 10.57745/GZKUZS
10.57745/NGC4J0
10.57745/IZHDPC
10.57745/LPJ2S2
10.57745/W9N5Z9 10.57745/W9N5Z9
10.57745/RUQLJL 10.57745/TVAHUQ
10.15454/8UIA76 10.57745/BYWEA3
10.57745/OVCWQN 10.15454/O93984
10.57745/QOA1QO
10.57745/YWBDQQ
10.57745/JOZ1NA
10.57745/XHQ7TL
10.57745/ID1LS6
10.57745/3VMB3Y
10.57745/MXEMI4 10.57745/MXEMI4
10.57745/NOHRHJ
10.57745/OVCWQN
10.57745/5O6QIH 10.57745/5O6QIH
10.57745/KTFZQD 10.57745/RUQLJL
10.57745/R1NIKK 10.57745/OT1IFB
10.57745/IZHDPC
10.57745/TOR3SF
10.57745/Z3BG2U 10.57745/Z3BG2U
10.57745/7HF7KG
10.57745/3D4DFW 10.57745/3D4DFW
10.57745/OT1IFB 10.57745/LXTWNG
10.57745/XHQ7TL 10.57745/LUTMNE
10.57745/ID1LS6 10.15454/8UIA76
10.57745/52HT2L 10.57745/TOR3SF
10.57745/YWBDQQ 10.57745/J2A44Q
10.57745/LPJ2S2 10.57745/HZDPTT
10.57745/CM2WOI
10.57745/3VMB3Y
10.15454/O93984
10.57745/GRHRZJ
10.57745/7RFNNP
10.57745/UOGRPY
This diff is collapsed.
2-produce-graph/hist--datasets-by-year.png

25.6 KiB | W: | H:

2-produce-graph/hist--datasets-by-year.png

25.5 KiB | W: | H:

2-produce-graph/hist--datasets-by-year.png
2-produce-graph/hist--datasets-by-year.png
2-produce-graph/hist--datasets-by-year.png
2-produce-graph/hist--datasets-by-year.png
  • 2-up
  • Swipe
  • Onion skin
2-produce-graph/pie--datacite-client.png

36.2 KiB | W: | H:

2-produce-graph/pie--datacite-client.png

40.1 KiB | W: | H:

2-produce-graph/pie--datacite-client.png
2-produce-graph/pie--datacite-client.png
2-produce-graph/pie--datacite-client.png
2-produce-graph/pie--datacite-client.png
  • 2-up
  • Swipe
  • Onion skin
2-produce-graph/pie--datacite-type.png

39 KiB

import pandas as pd, matplotlib, matplotlib.pyplot as plt
import z_my_functions as my_fct
import seaborn as sns
import random
df = my_fct.load_and_treat_csv()
print(df.columns)
df_type = df["resourceTypeGeneral"].value_counts()
# print(df_type_raw)
# ## regroup small values in "other"
# treshold = 20
# df_type = df_type_raw[df_type_raw > treshold]
# df_type["other"] = df_type[df_type <= treshold].sum()
#define Seaborn color palette to use
colors = sns.color_palette('pastel')[0:len(df_type)]
random.shuffle(colors) ## so that blue is not more the first item
plt.pie(df_type, colors = colors, autopct=lambda p: '{:.0f}%'.format(round(p)) if p > 1 else '', startangle = 160)
## auto pct only if value > 1
plt.legend(df_type.index, loc = (0.7, -0.1) )
plt.title(f"Type of datasets", fontsize = 20, x = 0.5, y = 1.03, alpha = 0.6)
plt.suptitle(f"n = {len(df)}", fontsize = 11, x = 0.5, y = 0.9, alpha = 0.6)
plt.savefig("pie--datacite-type.png")
# print(len(df))
\ No newline at end of file
...@@ -17,7 +17,7 @@ clients_name = [] ...@@ -17,7 +17,7 @@ clients_name = []
for item in df_client.index : for item in df_client.index :
short_name = item[: item.find(".")] short_name = item[: item.find(".")]
if short_name not in ["inist", "jbru"] : if short_name not in ["inist", "jbru"] :
clients_name.append( short_name) clients_name.append( short_name.capitalize())
else : else :
clients_name.append(item) clients_name.append(item)
...@@ -32,6 +32,7 @@ colors = sns.color_palette('pastel')[0:len(df_client)] ...@@ -32,6 +32,7 @@ colors = sns.color_palette('pastel')[0:len(df_client)]
plt.pie(df_client, labels = clients_name, colors = colors, autopct='%.0f%%') plt.pie(df_client, labels = clients_name, colors = colors, autopct='%.0f%%')
plt.title(f"Distribution of datasets by DataCite client", fontsize = 20, x = 0.5, y = 1.03, alpha = 0.6) plt.title(f"Distribution of datasets by DataCite client", fontsize = 20, x = 0.5, y = 1.03, alpha = 0.6)
plt.suptitle(f"n = {len(df)}", fontsize = 11, x = 0.5, y = 0.90, alpha = 0.6)
plt.savefig("pie--datacite-client.png") plt.savefig("pie--datacite-client.png")
......
...@@ -6,7 +6,7 @@ def load_and_treat_csv() : ...@@ -6,7 +6,7 @@ def load_and_treat_csv() :
df_raw = pd.read_csv("../dois-uga.csv", index_col=False) df_raw = pd.read_csv("../dois-uga.csv", index_col=False)
## remove datacite type that are not "research data" ## remove datacite type that are not "research data"
type_to_explude = ["Book", "ConferencePaper", "JournalArticle", "BookChapter", "Service", "Preprint"] type_to_explude = ["Book", "ConferencePaper", "ConferenceProceeding", "JournalArticle", "BookChapter", "Service", "Preprint"]
df = df_raw[ ~df_raw["resourceTypeGeneral"].isin(type_to_explude) ].copy() df = df_raw[ ~df_raw["resourceTypeGeneral"].isin(type_to_explude) ].copy()
return df return df
......
source diff could not be displayed: it is too large. Options to address this: view the blob.
...@@ -19,4 +19,9 @@ file_names = [ ...@@ -19,4 +19,9 @@ file_names = [
"rdg.py" "rdg.py"
] ]
execute_python_file(file_names[1]) for file in file_names :
execute_python_file(file)
# execute_python_file(file_names[1])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment