{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "79d91740", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import networkx as nx\n", "import math\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import confusion_matrix\n", "from scipy.optimize import linear_sum_assignment as linear_assignment\n", "import scipy.special as ss\n", "import pandas as pd\n", "import matplotlib.colors as mcolors\n", "import random\n", "import collections\n", "\n", "def structural_pattern_int(statistics):\n", " \"\"\"return the structural pattern of a graph for integer nodal statistics:\n", " # input is the dictionary of values (node, measure value)\n", " # return a dictionary having as key the values in statistics codomain and as values the orbit/class (as set of nodes) \n", " \"\"\"\n", " \n", " measure=dict(statistics)\n", " # input is the dictionary of values (node, measure value)\n", " # compute the orbits, return a dictionary with key all taken measure value and object the set of nodes\n", " \n", " orbit_set={}\n", " for m in set(measure.values()):\n", " orbit_set[m]=set([k for k,v in measure.items() if v==m])\n", " \n", " return orbit_set\n", "\n", "def structural_pattern(statistics,epsilon):\n", " \"\"\"return the structural pattern of a graph for dense nodal statistics:\n", " # input is the dictionary of values (node, measure value) and epsilon which corresponds to the number of significativ digit to be used for the values comparison\n", " # return a dictionary having as key the values in statistics codomain and as values the orbit/class (as set of nodes) \n", " \"\"\"\n", " \n", " epsilon = \"{:.\"+str(epsilon)+\"}\"\n", " measure=dict(statistics)\n", " #input is the dictionary of values (node, measure value)\n", " #compute the orbits, return a dictionary with key all taken measure value and object the set of nodes\n", " orbit_set={}\n", " for m in set(measure.values()):\n", " orbit_set[epsilon.format(float(m))]=set([k for k,v in measure.items() if epsilon.format(float(v))==epsilon.format(float(m))])\n", " return orbit_set\n", "\n", "def nodes_in_trivial_classes(structural_pattern):\n", " \"\"\"\"return a list of nodes in trivial class of the given structural pattern of a graph\n", " #input is the structural pattern of a graph (as set of classes)\n", " \"\"\"\n", " #taken a dictionary of orbits\n", " #return a list of fixed point\n", " fixed_point=[]\n", " for (k,o) in structural_pattern.items():\n", " if len(o)==1:\n", " fixed_point.append(list(o)[0])\n", " \n", " return fixed_point\n", "\n", "def intersection_structural_patterns(orbits_set_1,orbits_set_2):\n", " \"\"\"given two structural pattern associated with different statistics, return their intersection\n", " #input are two structural patterns on the same graph (as orbits set)\n", " #output it the intersection of structural patterns\n", " \"\"\"\n", " #for each orbit set in orbits1 compute the intersection with each orbitset in orbits2\n", " #keep only the non-empty set\n", " #repeat for every orbitset in orbits1\n", " intersection_orbits={}\n", " for (k,o1) in orbits_set_1.items():\n", " for (c,o2) in orbits_set_2.items():\n", " f=o1.intersection(o2)\n", " if len(f)!=0:\n", " intersection_orbits[(k,c)]=f\n", " return intersection_orbits\n", " \n", "def count_nodes_in_non_trivial_class(structural_pattern,num_nodes):\n", " \"\"\"return the number of nodes in non-trivial class for a given orbits set and the total number of nodes\"\"\"\n", " \n", " return num_nodes-len(nodes_in_trivial_classes(structural_pattern))\n", "\n", "def graph_structural_pattern(G, statistics=[\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision=6):\n", " \n", " \"\"\" return the structural pattern of the graph associated to each single statistics in statistics list\n", " implemented only for the following nodal statistics:\n", " \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n", " \n", " #output is in the format of dictionary with keys the statistics identifier\n", " \n", " \"\"\"\n", " orbits_G={}\n", " error_case = {\"0\":set(G.nodes())}\n", " if \"d\" in statistics:\n", " \n", " try:\n", " degrees=G.degree()\n", " orbits_G[\"d\"] = structural_pattern_int(degrees) \n", " except:\n", " orbits_G[\"d\"] = error_case\n", " \n", " if \"b\" in statistics:\n", " try:\n", " betw=nx.betweenness_centrality(G)\n", " orbits_G[\"b\"] = structural_pattern(betw,precision)\n", " except:\n", " orbits_G[\"b\"] = error_case\n", " \n", " if \"cc\" in statistics:\n", " try:\n", " cc=nx.clustering(G)\n", " orbits_G[\"cc\"] = structural_pattern(cc,precision)\n", " except:\n", " orbits_G[\"cc\"] = error_case\n", " if \"cs\" in statistics:\n", " try:\n", " cs = nx.closeness_centrality(G)\n", " orbits_G[\"cs\"] = structural_pattern(cs,precision)\n", " except:\n", " orbits_G[\"cs\"] = error_case\n", " if \"s\" in statistics:\n", " try:\n", " s=nx.second_order_centrality(G)\n", " orbits_G[\"s\"] = structural_pattern(s,precision)\n", " except:\n", " orbits_G[\"s\"] = error_case\n", " \n", " return orbits_G\n", "\n", "def orthogonality_in_G(G,statistics=[\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision=6):\n", " \"\"\"\n", " return a measure of orthogonality of a collection of nodal statistics, \n", " implemented only for the following nodal statistics:\n", " \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n", " \n", " 0 means perfectely orthogonal (all nodes of the intersection are in trivial orbits)\n", " 1 means all nodes in the intersection have at least one equivalent node\"\"\"\n", " \n", " N = len(G.nodes())\n", " orbit_set_of_G=graph_structural_pattern(G,statistics=statistics,precision=precision)\n", " \n", " inter=orbit_set_of_G.get(measure[0])\n", " for m in measure:\n", " o = orbit_set_of_G.get(m)\n", " inter = intersection_structural_patterns(inter,o)\n", " \n", " return count_nodes_in_non_trivial_class(inter,N)/N\n", "\n", "def orthogonal_score(orbits_set_1,orbits_set_2,N):\n", " \"\"\"return a measure of orthogonality of two orbits sets, \n", " 0 means perfectely orthogonal (all nodes of the intersection are in trivial class)\n", " 1 means all nodes have at least one equivalent node\"\"\"\n", " \n", " inter = intersection_structural_patterns(orbit_set_1,orbit_set_2)\n", " \n", " return count_nodes_in_non_trivial_orbits(inter,N)/N\n", " \n", " \n", "import scipy.special as ss\n", "\n", "def open_data(path, dtype=float, skiprows=0, usecols=range(90)):\n", " \"\"\"open correlation matrices file given its path, check usecols to be range(number of nodes)\"\"\"\n", " #open a single csv file and convert into a numpy matrix \n", " #in our case it'd be a 90x90 matrix, for gin-chuga files need to skip a row and usecols from 1-91\n", " corr_matrix=np.loadtxt(fname=path,dtype=dtype,skiprows=skiprows,usecols=usecols)\n", " return corr_matrix\n", "\n", "import math\n", "from networkx import tree\n", "\n", "def adj_matrix_connected(corr_matrix,sparsity_value):\n", " \"\"\"\n", " given the correlation matrix and the expected sparsity coefficient it can \n", " happen that the corresponding thresholded matrix results in a disconnected graph\n", " here we force the graph to be fully connected by the computation of the minimum\n", " spanning tree and adding the required edges in order to have a unique connected component \n", " \"\"\"\n", " if sparsity_value == 1.0:\n", " adj_matrix=np.ones(corr_matrix.shape)\n", " np.fill_diagonal(adj_matrix,0)\n", " return adj_matrix\n", " \n", " \n", " corr_matrix =abs(corr_matrix)\n", "\n", " max_num_edges = ss.comb(corr_matrix.shape[0],2)\n", " num_edges = int(max_num_edges*sparsity_value)\n", " \n", " num_regions=corr_matrix.shape[0]\n", " #total number of regions in the graph\n", " \n", " totalgraph=nx.from_numpy_matrix(1-abs(corr_matrix))\n", " #extraction of a complete graph having has weight 1-abs(correlation)\n", " #we need to take 1-abs since the mst is taking the minimum weight graph and we want the most correlated edges to be there\n", " \n", " MST=nx.adjacency_matrix(tree.minimum_spanning_tree(totalgraph).to_undirected()).todense()\n", " MST_adj_mat=MST\n", " MST_adj_mat[MST>0]==1\n", " MST_adj_mat=np.triu(MST_adj_mat) #put zeros in the inferior triangular matrix\n", " \n", " #put zeros in the diagonal of the corr matrix\n", " for i in range(num_regions):\n", " corr_matrix[i,i]=0\n", " \n", " values_corr=abs(np.triu(corr_matrix))\n", " \n", " cor_wo_MST=values_corr[np.triu(MST_adj_mat)==0]\n", " #we do not consider the correlation values which do not involve edges that are already in the MST\n", " \n", " values=list(cor_wo_MST.flatten())\n", " values.sort(reverse=True)\n", " \n", " #we select the maximum value of correlation to have the expected num of edges - num of edges in the mst (num regions-1)\n", " value_thresh=values[num_edges-(num_regions-1)-1] #-1 index start at 0\n", " \n", " adj_matrix=np.zeros(corr_matrix.shape) \n", " \n", " #we put an edge if the value of correlation is higher than the found threshold or if the edges is required by the mst\n", " adj_matrix[values_corr>=value_thresh]=1\n", " adj_matrix[MST_adj_mat!=0]=1\n", " \n", " adj_matrix=np.triu(adj_matrix)+np.transpose(np.triu(adj_matrix)) #simmetry of the adj matrix\n", " \n", " return adj_matrix\n", "\n", "def adj_matrix_connected_num_edges(corr_matrix,num_edges):\n", " \"\"\"given the correlation matrix and the expected # edges it can \n", " happen that the corresponding thresholded matrix results in a disconnected graph\n", " here we force the graph to be fully connected by the computation of the minimum\n", " spanning tree and adding the required edges in order to have a single connected component \n", " \"\"\"\n", " max_num_edges = ss.comb(corr_matrix.shape[0],2)\n", " \n", " if num_edges == max_num_edges:\n", " adj_matrix=np.ones(corr_matrix.shape)\n", " np.fill_diagonal(adj_matrix,0)\n", " return adj_matrix\n", " \n", " \n", " \n", " num_regions=corr_matrix.shape[0]\n", " #total number of regions in the graph\n", " \n", " totalgraph=nx.from_numpy_matrix(1-abs(corr_matrix))\n", " #extraction of a complete graph having has weight 1-abs(correlation)\n", " #we need to take 1-abs since the mst is taking the minimum weight graph and we want the most correlated edges to be there\n", " \n", " MST=nx.adjacency_matrix(tree.minimum_spanning_tree(totalgraph).to_undirected()).todense()\n", " MST_adj_mat=MST\n", " MST_adj_mat[MST>0]==1\n", " MST_adj_mat=np.triu(MST_adj_mat) #put zeros in the inferior triangular matrix\n", " \n", " #put zeros in the diagonal of the corr matrix\n", " for i in range(num_regions):\n", " corr_matrix[i,i]=0\n", " \n", " values_corr=abs(np.triu(corr_matrix))\n", " \n", " cor_wo_MST=values_corr[np.triu(MST_adj_mat)==0]\n", " #we do not consider the correlation values which do not involve edges that are already in the MST\n", " \n", " values=list(cor_wo_MST.flatten())\n", " values.sort(reverse=True)\n", " \n", " #we select the maximum value of correlation to have the expected num of edges - num of edges in the mst (num regions-1)\n", " value_thresh=values[num_edges-(num_regions-1)-1] #-1 index start at 0\n", " \n", " adj_matrix=np.zeros(corr_matrix.shape) \n", " \n", " #we put an edge if the value of correlation is higher than the found threshold or if the edges is required by the mst\n", " adj_matrix[values_corr>=value_thresh]=1\n", " adj_matrix[MST_adj_mat!=0]=1\n", " \n", " adj_matrix=np.triu(adj_matrix)+np.transpose(np.triu(adj_matrix)) #simmetry of the adj matrix\n", " \n", " return adj_matrix\n", "\n", "def adj_converstion_to_structural_patterns(dataset_adj,statistics,precision=6):\n", " \"\"\" convert a dataset of adjacency matrices to one of structural pattern associated to the collection of statistics in statistics,\n", " available statistics are \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n", "\n", " \"\"\"\n", " dataset_orbits = {}\n", " for key,adj in dataset_adj.items():\n", " G = nx.from_numpy_array(adj)\n", " orbits_of_G = graph_structural_pattern(G, statistics=statistics, precision=precision)\n", " intersection = orbits_of_G.get(statistics[0])\n", " if len(statistics) > 1:\n", " for s in statistics[1:]: \n", " intersection = intersection_structural_patterns(intersection,orbits_of_G.get(s))\n", " dataset_orbits[key] = intersection\n", " \n", " return dataset_orbits\n", "\n", "\n", "def graphs_converstion_to_structural_patterns(dataset_graph,statistics,precision=6):\n", " \"\"\" convert a dataset of graphs to one of structural pattern associated to the given list of statistics\n", " available statistics are \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n", " \"\"\"\n", " \n", " dataset_orbits = {}\n", " for key,G in dataset_graph.items():\n", " orbits_of_G = graph_structural_pattern(G, statistics=statistics, precision=precision)\n", " intersection = orbits_of_G.get(statistics[0])\n", " if len(statistics) > 1:\n", " for s in statistics[1:]: \n", " intersection = intersection_structural_patterns(intersection,orbits_of_G.get(s))\n", " dataset_orbits[key] = intersection\n", " \n", " return dataset_orbits\n", "\n", "def orthogonality_of_dataset(dataset, statistics =[\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision = 6): \n", " \"\"\" evaluate the orthogonality of the collection of statistics on a given dataset, dataset can be either adjacency dataset or graph dataset\n", " available statistics are \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n", " \"\"\"\n", " \n", " #check the dataset type\n", " \n", " \n", " sample = list(dataset.values())[0]\n", " if isinstance(sample,np.ndarray):\n", " #the dataset contains adjacency matrix\n", " dataset_converted = adj_converstion_to_structural_patterns(dataset,statistics,precision=precision)\n", " N = sample.shape[0]\n", " elif isinstance(sample,nx.Graph):\n", " #the dataset contains graphs\n", " dataset_converted = graphs_converstion_to_structural_patterns(dataset,statistics,precision=precision)\n", " N = len(sample.nodes())\n", " else:\n", " print(\"the dataset is not in a good format,check it is composed either of nx.Graph or np.ndarray \")\n", " return None\n", " \n", " ortho = []\n", " for key,str_pat in dataset_converted.items() :\n", " ortho.append((N-len(nodes_in_trivial_orbits(str_pat)))/N)\n", " \n", " return ortho\n", "\n", "\n", "def orthogonality_of_dataset_over_sparsity(dataset, sparsity =np.linspace(0.1,1,10),statistics =[\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision = 6) \n", " \"\"\" evaluate the orthogonality of the given collection of statistics at different sparsity level, dataset is a correlation matrices dataset, return the mean orthogonality score and the std orthogonality \n", " available statistics are \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n", " return a tuple of mean value and std\n", " \"\"\"\n", " \n", " ortho_mean={}\n", " ortho_std={}\n", " \n", " for s in sparsity:\n", " adj_dataset = {}\n", " for k,corr_matrix in dataset.items():\n", " adj_dataset[k] = adj_matrix_connected(corr_matrix,s)\n", " \n", " dataset_converted = adj_converstion_to_structural_patterns(dataset,statistics,precision=precision)\n", "\n", " orth_s = orthogonality_of_dataset(adj_dataset, statistics = statistics, precision = precision)\n", " \n", " ortho_mean[s] = np.mean(orth_s)\n", " ortho_std[s] = np.std(orth_s)\n", " \n", " return ortho_mean, ortho_std \n", "\n", "def nodal_percentage_participation_no_class_distinction(structural_patterns_of_a_group,num_region):\n", " \"\"\"input is a dictionary with keys (class,subject) and values the structural pattern, \n", " return a dictionary whose keys are the nodes and values are the nodal percentage participation in non trivial class \n", " with no class_distiction!\n", " \"\"\"\n", " \n", " counting_classe = {}\n", " \n", " \n", " for i in range(num_region):\n", " counting_classe[i]=0\n", "\n", "\n", " for o in structural_patterns_of_a_group.values():\n", " n = nodes_in_trivial_class(o)\n", " for i in range(num_region):\n", " if i not in n:\n", " counting_classe[i]=counting_classe[i]+1\n", "\n", " for i in range(num_region): \n", " counting_classe[i] = counting_classe[i]/len(list(orbits_dataset.keys()))\n", "\n", " return counting_classe\n", "\n", "def nodal_percentage_participation_dataset(structural_pattern_dataset,num_region):\n", " \"\"\"dataset is dataset of structural_pattern_dataset at an already fixed sparsity and associated with previously given collection of statistics,\n", " input is a dictionary with keys (class,subject) and values the structural pattern, \n", " return a dictionary whose keys are the class in the dataset and values is a dictionary keyd by nodes and values are nodal percentage participation in non trivial class \n", " \"\"\"\n", " \n", " counting_dataset = {}\n", " \n", " for classe in set([k[0] for k in orbits_dataset.keys()]):\n", " counting_classe={}\n", " \n", " for i in range(num_region):\n", " counting_classe[i]=0\n", " \n", " all_classe=[orb for k,orb in list(structural_pattern_dataset.items()) if k[0]==classe]\n", " \n", " \n", " for o in all_classe:\n", " n = nodes_in_trivial_class(o)\n", " for i in range(num_region):\n", " if i not in n:\n", " counting_classe[i]=counting_classe[i]+1\n", " \n", " for i in range(num_region): \n", " counting_classe[i] = counting_classe[i]/len(all_classe)\n", " counting_dataset[classe] = counting_classe\n", " \n", " return counting_dataset \n", "\n", "def help_to_structural_patterns(dataset_adj,statistics,precision=6):\n", " \"\"\" convert a dataset of adjacency matrices to one of structural pattern associated to all statistics in the given list of statistics\n", " implemented only for the following nodal statistics:\n", " \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n", " \n", " \"\"\"\n", " dataset_orbits = {}\n", " for key,adj in dataset_adj.items():\n", " G = nx.from_numpy_array(adj)\n", " orbits_of_G = graph_structural_pattern(G, statistics=statistics, precision=precision)\n", " dataset_orbits[key] = orbits_of_G\n", " \n", " return dataset_orbits\n", "\n", "from itertools import combinations\n", "def orthogonality_of_dataset_over_pairs(dataset, statistics = [\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision = 6): \n", " \"\"\" evaluate the orthogonality of all possible pairs of nodal statistics in the input list on a given dataset,\n", " dataset can be either adjacencies dataset or graph dataset (class,subjectID):adj_mat/(class,subjectID):graph\n", " implemented only for the following nodal statistics:\n", " \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n", " return a dictionary keys by statistics pairs with values a list of orthogonality score of the elements in dataset\n", " \"\"\"\n", " \n", " dataset_converted = help_to_structural_patterns(dataset,statistics,precision=precision)\n", " sample = list(dataset.values())[0]\n", " N = sample.shape[0]\n", " \n", " ortho = {}\n", "\n", " couple_statistics = list(combinations(statistics,2))\n", " for k in range(len(couple_statistics)):\n", " couple = couple_statistics[k]\n", " ortho[couple] = []\n", " for k in range(len(couple_statistics)):\n", " couple = couple_statistics[k]\n", " for key,str_pat in dataset_converted.items() :\n", " intersection = str_pat.get(couple[0])\n", " intersection = intersection_structural_patterns(intersection,str_pat.get(couple[1]))\n", " o = (N-len(nodes_in_trivial_class(intersection)))/N\n", " ortho[couple].append(o) \n", " return ortho" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 5 }