diff --git a/1-enrich-with-datacite/concatenate-enrich-dois.py b/1-enrich-with-datacite/concatenate-enrich-dois.py index c5e2c5f3c04ef56d5f3218fafcb2e29fc1a38b7b..b1c7657f49c24438ccc46a72668a99377bf3d489 100644 --- a/1-enrich-with-datacite/concatenate-enrich-dois.py +++ b/1-enrich-with-datacite/concatenate-enrich-dois.py @@ -56,14 +56,23 @@ for doi in dois : #[:300] ## if new datasets has been founded if temp_rows : df_fresh = pd.DataFrame(temp_rows) - + i_to_drop = [] + dois_added = list(df_old["doi"]) + to_del = [] for i in range(0, len(df_fresh)): result = my_functions.get_origin_version(df_fresh.loc[df_fresh.index[i], "doi"]) - if result[0] is in df_old["doi"]: df_fresh.drop(df_fresh.index[i]) - else: + if result[0] not in dois_added: + dois_added.append(result[0]) df_fresh.loc[df_fresh.index[i], "doi"] = result[0] df_fresh.loc[df_fresh.index[i], "relation_nbInstances"] = result[1] df_fresh.loc[df_fresh.index[i], "relation_nbCitation"] = result[2] + else: + to_del.append(i) + + df_fresh.drop(to_del, inplace=True) + print("Nombre de dois supprimés : " + str(len(to_del))) + + print("Nb dois a garder : " + str(len(dois_added))) df_concat = pd.concat([df_old, df_fresh], ignore_index=True)