Skip to content
Snippets Groups Projects
Commit 8aeaa5cc authored by Lucrezia Carboni's avatar Lucrezia Carboni
Browse files

Delete structural-pattern-functions.py

parent 86f5e6a8
No related branches found
No related tags found
No related merge requests found
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "79d91740",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import networkx as nx\n",
"import math\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import confusion_matrix\n",
"from scipy.optimize import linear_sum_assignment as linear_assignment\n",
"import scipy.special as ss\n",
"import pandas as pd\n",
"import matplotlib.colors as mcolors\n",
"import random\n",
"import collections\n",
"\n",
"def structural_pattern_int(statistics):\n",
" \"\"\"return the structural pattern of a graph for integer nodal statistics:\n",
" # input is the dictionary of values (node, measure value)\n",
" # return a dictionary having as key the values in statistics codomain and as values the orbit/class (as set of nodes) \n",
" \"\"\"\n",
" \n",
" measure=dict(statistics)\n",
" # input is the dictionary of values (node, measure value)\n",
" # compute the orbits, return a dictionary with key all taken measure value and object the set of nodes\n",
" \n",
" orbit_set={}\n",
" for m in set(measure.values()):\n",
" orbit_set[m]=set([k for k,v in measure.items() if v==m])\n",
" \n",
" return orbit_set\n",
"\n",
"def structural_pattern(statistics,epsilon):\n",
" \"\"\"return the structural pattern of a graph for dense nodal statistics:\n",
" # input is the dictionary of values (node, measure value) and epsilon which corresponds to the number of significativ digit to be used for the values comparison\n",
" # return a dictionary having as key the values in statistics codomain and as values the orbit/class (as set of nodes) \n",
" \"\"\"\n",
" \n",
" epsilon = \"{:.\"+str(epsilon)+\"}\"\n",
" measure=dict(statistics)\n",
" #input is the dictionary of values (node, measure value)\n",
" #compute the orbits, return a dictionary with key all taken measure value and object the set of nodes\n",
" orbit_set={}\n",
" for m in set(measure.values()):\n",
" orbit_set[epsilon.format(float(m))]=set([k for k,v in measure.items() if epsilon.format(float(v))==epsilon.format(float(m))])\n",
" return orbit_set\n",
"\n",
"def nodes_in_trivial_classes(structural_pattern):\n",
" \"\"\"\"return a list of nodes in trivial class of the given structural pattern of a graph\n",
" #input is the structural pattern of a graph (as set of classes)\n",
" \"\"\"\n",
" #taken a dictionary of orbits\n",
" #return a list of fixed point\n",
" fixed_point=[]\n",
" for (k,o) in structural_pattern.items():\n",
" if len(o)==1:\n",
" fixed_point.append(list(o)[0])\n",
" \n",
" return fixed_point\n",
"\n",
"def intersection_structural_patterns(orbits_set_1,orbits_set_2):\n",
" \"\"\"given two structural pattern associated with different statistics, return their intersection\n",
" #input are two structural patterns on the same graph (as orbits set)\n",
" #output it the intersection of structural patterns\n",
" \"\"\"\n",
" #for each orbit set in orbits1 compute the intersection with each orbitset in orbits2\n",
" #keep only the non-empty set\n",
" #repeat for every orbitset in orbits1\n",
" intersection_orbits={}\n",
" for (k,o1) in orbits_set_1.items():\n",
" for (c,o2) in orbits_set_2.items():\n",
" f=o1.intersection(o2)\n",
" if len(f)!=0:\n",
" intersection_orbits[(k,c)]=f\n",
" return intersection_orbits\n",
" \n",
"def count_nodes_in_non_trivial_class(structural_pattern,num_nodes):\n",
" \"\"\"return the number of nodes in non-trivial class for a given orbits set and the total number of nodes\"\"\"\n",
" \n",
" return num_nodes-len(nodes_in_trivial_classes(structural_pattern))\n",
"\n",
"def graph_structural_pattern(G, statistics=[\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision=6):\n",
" \n",
" \"\"\" return the structural pattern of the graph associated to each single statistics in statistics list\n",
" implemented only for the following nodal statistics:\n",
" \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n",
" \n",
" #output is in the format of dictionary with keys the statistics identifier\n",
" \n",
" \"\"\"\n",
" orbits_G={}\n",
" error_case = {\"0\":set(G.nodes())}\n",
" if \"d\" in statistics:\n",
" \n",
" try:\n",
" degrees=G.degree()\n",
" orbits_G[\"d\"] = structural_pattern_int(degrees) \n",
" except:\n",
" orbits_G[\"d\"] = error_case\n",
" \n",
" if \"b\" in statistics:\n",
" try:\n",
" betw=nx.betweenness_centrality(G)\n",
" orbits_G[\"b\"] = structural_pattern(betw,precision)\n",
" except:\n",
" orbits_G[\"b\"] = error_case\n",
" \n",
" if \"cc\" in statistics:\n",
" try:\n",
" cc=nx.clustering(G)\n",
" orbits_G[\"cc\"] = structural_pattern(cc,precision)\n",
" except:\n",
" orbits_G[\"cc\"] = error_case\n",
" if \"cs\" in statistics:\n",
" try:\n",
" cs = nx.closeness_centrality(G)\n",
" orbits_G[\"cs\"] = structural_pattern(cs,precision)\n",
" except:\n",
" orbits_G[\"cs\"] = error_case\n",
" if \"s\" in statistics:\n",
" try:\n",
" s=nx.second_order_centrality(G)\n",
" orbits_G[\"s\"] = structural_pattern(s,precision)\n",
" except:\n",
" orbits_G[\"s\"] = error_case\n",
" \n",
" return orbits_G\n",
"\n",
"def orthogonality_in_G(G,statistics=[\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision=6):\n",
" \"\"\"\n",
" return a measure of orthogonality of a collection of nodal statistics, \n",
" implemented only for the following nodal statistics:\n",
" \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n",
" \n",
" 0 means perfectely orthogonal (all nodes of the intersection are in trivial orbits)\n",
" 1 means all nodes in the intersection have at least one equivalent node\"\"\"\n",
" \n",
" N = len(G.nodes())\n",
" orbit_set_of_G=graph_structural_pattern(G,statistics=statistics,precision=precision)\n",
" \n",
" inter=orbit_set_of_G.get(measure[0])\n",
" for m in measure:\n",
" o = orbit_set_of_G.get(m)\n",
" inter = intersection_structural_patterns(inter,o)\n",
" \n",
" return count_nodes_in_non_trivial_class(inter,N)/N\n",
"\n",
"def orthogonal_score(orbits_set_1,orbits_set_2,N):\n",
" \"\"\"return a measure of orthogonality of two orbits sets, \n",
" 0 means perfectely orthogonal (all nodes of the intersection are in trivial class)\n",
" 1 means all nodes have at least one equivalent node\"\"\"\n",
" \n",
" inter = intersection_structural_patterns(orbit_set_1,orbit_set_2)\n",
" \n",
" return count_nodes_in_non_trivial_orbits(inter,N)/N\n",
" \n",
" \n",
"import scipy.special as ss\n",
"\n",
"def open_data(path, dtype=float, skiprows=0, usecols=range(90)):\n",
" \"\"\"open correlation matrices file given its path, check usecols to be range(number of nodes)\"\"\"\n",
" #open a single csv file and convert into a numpy matrix \n",
" #in our case it'd be a 90x90 matrix, for gin-chuga files need to skip a row and usecols from 1-91\n",
" corr_matrix=np.loadtxt(fname=path,dtype=dtype,skiprows=skiprows,usecols=usecols)\n",
" return corr_matrix\n",
"\n",
"import math\n",
"from networkx import tree\n",
"\n",
"def adj_matrix_connected(corr_matrix,sparsity_value):\n",
" \"\"\"\n",
" given the correlation matrix and the expected sparsity coefficient it can \n",
" happen that the corresponding thresholded matrix results in a disconnected graph\n",
" here we force the graph to be fully connected by the computation of the minimum\n",
" spanning tree and adding the required edges in order to have a unique connected component \n",
" \"\"\"\n",
" if sparsity_value == 1.0:\n",
" adj_matrix=np.ones(corr_matrix.shape)\n",
" np.fill_diagonal(adj_matrix,0)\n",
" return adj_matrix\n",
" \n",
" \n",
" corr_matrix =abs(corr_matrix)\n",
"\n",
" max_num_edges = ss.comb(corr_matrix.shape[0],2)\n",
" num_edges = int(max_num_edges*sparsity_value)\n",
" \n",
" num_regions=corr_matrix.shape[0]\n",
" #total number of regions in the graph\n",
" \n",
" totalgraph=nx.from_numpy_matrix(1-abs(corr_matrix))\n",
" #extraction of a complete graph having has weight 1-abs(correlation)\n",
" #we need to take 1-abs since the mst is taking the minimum weight graph and we want the most correlated edges to be there\n",
" \n",
" MST=nx.adjacency_matrix(tree.minimum_spanning_tree(totalgraph).to_undirected()).todense()\n",
" MST_adj_mat=MST\n",
" MST_adj_mat[MST>0]==1\n",
" MST_adj_mat=np.triu(MST_adj_mat) #put zeros in the inferior triangular matrix\n",
" \n",
" #put zeros in the diagonal of the corr matrix\n",
" for i in range(num_regions):\n",
" corr_matrix[i,i]=0\n",
" \n",
" values_corr=abs(np.triu(corr_matrix))\n",
" \n",
" cor_wo_MST=values_corr[np.triu(MST_adj_mat)==0]\n",
" #we do not consider the correlation values which do not involve edges that are already in the MST\n",
" \n",
" values=list(cor_wo_MST.flatten())\n",
" values.sort(reverse=True)\n",
" \n",
" #we select the maximum value of correlation to have the expected num of edges - num of edges in the mst (num regions-1)\n",
" value_thresh=values[num_edges-(num_regions-1)-1] #-1 index start at 0\n",
" \n",
" adj_matrix=np.zeros(corr_matrix.shape) \n",
" \n",
" #we put an edge if the value of correlation is higher than the found threshold or if the edges is required by the mst\n",
" adj_matrix[values_corr>=value_thresh]=1\n",
" adj_matrix[MST_adj_mat!=0]=1\n",
" \n",
" adj_matrix=np.triu(adj_matrix)+np.transpose(np.triu(adj_matrix)) #simmetry of the adj matrix\n",
" \n",
" return adj_matrix\n",
"\n",
"def adj_matrix_connected_num_edges(corr_matrix,num_edges):\n",
" \"\"\"given the correlation matrix and the expected # edges it can \n",
" happen that the corresponding thresholded matrix results in a disconnected graph\n",
" here we force the graph to be fully connected by the computation of the minimum\n",
" spanning tree and adding the required edges in order to have a single connected component \n",
" \"\"\"\n",
" max_num_edges = ss.comb(corr_matrix.shape[0],2)\n",
" \n",
" if num_edges == max_num_edges:\n",
" adj_matrix=np.ones(corr_matrix.shape)\n",
" np.fill_diagonal(adj_matrix,0)\n",
" return adj_matrix\n",
" \n",
" \n",
" \n",
" num_regions=corr_matrix.shape[0]\n",
" #total number of regions in the graph\n",
" \n",
" totalgraph=nx.from_numpy_matrix(1-abs(corr_matrix))\n",
" #extraction of a complete graph having has weight 1-abs(correlation)\n",
" #we need to take 1-abs since the mst is taking the minimum weight graph and we want the most correlated edges to be there\n",
" \n",
" MST=nx.adjacency_matrix(tree.minimum_spanning_tree(totalgraph).to_undirected()).todense()\n",
" MST_adj_mat=MST\n",
" MST_adj_mat[MST>0]==1\n",
" MST_adj_mat=np.triu(MST_adj_mat) #put zeros in the inferior triangular matrix\n",
" \n",
" #put zeros in the diagonal of the corr matrix\n",
" for i in range(num_regions):\n",
" corr_matrix[i,i]=0\n",
" \n",
" values_corr=abs(np.triu(corr_matrix))\n",
" \n",
" cor_wo_MST=values_corr[np.triu(MST_adj_mat)==0]\n",
" #we do not consider the correlation values which do not involve edges that are already in the MST\n",
" \n",
" values=list(cor_wo_MST.flatten())\n",
" values.sort(reverse=True)\n",
" \n",
" #we select the maximum value of correlation to have the expected num of edges - num of edges in the mst (num regions-1)\n",
" value_thresh=values[num_edges-(num_regions-1)-1] #-1 index start at 0\n",
" \n",
" adj_matrix=np.zeros(corr_matrix.shape) \n",
" \n",
" #we put an edge if the value of correlation is higher than the found threshold or if the edges is required by the mst\n",
" adj_matrix[values_corr>=value_thresh]=1\n",
" adj_matrix[MST_adj_mat!=0]=1\n",
" \n",
" adj_matrix=np.triu(adj_matrix)+np.transpose(np.triu(adj_matrix)) #simmetry of the adj matrix\n",
" \n",
" return adj_matrix\n",
"\n",
"def adj_converstion_to_structural_patterns(dataset_adj,statistics,precision=6):\n",
" \"\"\" convert a dataset of adjacency matrices to one of structural pattern associated to the collection of statistics in statistics,\n",
" available statistics are \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n",
"\n",
" \"\"\"\n",
" dataset_orbits = {}\n",
" for key,adj in dataset_adj.items():\n",
" G = nx.from_numpy_array(adj)\n",
" orbits_of_G = graph_structural_pattern(G, statistics=statistics, precision=precision)\n",
" intersection = orbits_of_G.get(statistics[0])\n",
" if len(statistics) > 1:\n",
" for s in statistics[1:]: \n",
" intersection = intersection_structural_patterns(intersection,orbits_of_G.get(s))\n",
" dataset_orbits[key] = intersection\n",
" \n",
" return dataset_orbits\n",
"\n",
"\n",
"def graphs_converstion_to_structural_patterns(dataset_graph,statistics,precision=6):\n",
" \"\"\" convert a dataset of graphs to one of structural pattern associated to the given list of statistics\n",
" available statistics are \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n",
" \"\"\"\n",
" \n",
" dataset_orbits = {}\n",
" for key,G in dataset_graph.items():\n",
" orbits_of_G = graph_structural_pattern(G, statistics=statistics, precision=precision)\n",
" intersection = orbits_of_G.get(statistics[0])\n",
" if len(statistics) > 1:\n",
" for s in statistics[1:]: \n",
" intersection = intersection_structural_patterns(intersection,orbits_of_G.get(s))\n",
" dataset_orbits[key] = intersection\n",
" \n",
" return dataset_orbits\n",
"\n",
"def orthogonality_of_dataset(dataset, statistics =[\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision = 6): \n",
" \"\"\" evaluate the orthogonality of the collection of statistics on a given dataset, dataset can be either adjacency dataset or graph dataset\n",
" available statistics are \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n",
" \"\"\"\n",
" \n",
" #check the dataset type\n",
" \n",
" \n",
" sample = list(dataset.values())[0]\n",
" if isinstance(sample,np.ndarray):\n",
" #the dataset contains adjacency matrix\n",
" dataset_converted = adj_converstion_to_structural_patterns(dataset,statistics,precision=precision)\n",
" N = sample.shape[0]\n",
" elif isinstance(sample,nx.Graph):\n",
" #the dataset contains graphs\n",
" dataset_converted = graphs_converstion_to_structural_patterns(dataset,statistics,precision=precision)\n",
" N = len(sample.nodes())\n",
" else:\n",
" print(\"the dataset is not in a good format,check it is composed either of nx.Graph or np.ndarray \")\n",
" return None\n",
" \n",
" ortho = []\n",
" for key,str_pat in dataset_converted.items() :\n",
" ortho.append((N-len(nodes_in_trivial_orbits(str_pat)))/N)\n",
" \n",
" return ortho\n",
"\n",
"\n",
"def orthogonality_of_dataset_over_sparsity(dataset, sparsity =np.linspace(0.1,1,10),statistics =[\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision = 6) \n",
" \"\"\" evaluate the orthogonality of the given collection of statistics at different sparsity level, dataset is a correlation matrices dataset, return the mean orthogonality score and the std orthogonality \n",
" available statistics are \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n",
" return a tuple of mean value and std\n",
" \"\"\"\n",
" \n",
" ortho_mean={}\n",
" ortho_std={}\n",
" \n",
" for s in sparsity:\n",
" adj_dataset = {}\n",
" for k,corr_matrix in dataset.items():\n",
" adj_dataset[k] = adj_matrix_connected(corr_matrix,s)\n",
" \n",
" dataset_converted = adj_converstion_to_structural_patterns(dataset,statistics,precision=precision)\n",
"\n",
" orth_s = orthogonality_of_dataset(adj_dataset, statistics = statistics, precision = precision)\n",
" \n",
" ortho_mean[s] = np.mean(orth_s)\n",
" ortho_std[s] = np.std(orth_s)\n",
" \n",
" return ortho_mean, ortho_std \n",
"\n",
"def nodal_percentage_participation_no_class_distinction(structural_patterns_of_a_group,num_region):\n",
" \"\"\"input is a dictionary with keys (class,subject) and values the structural pattern, \n",
" return a dictionary whose keys are the nodes and values are the nodal percentage participation in non trivial class \n",
" with no class_distiction!\n",
" \"\"\"\n",
" \n",
" counting_classe = {}\n",
" \n",
" \n",
" for i in range(num_region):\n",
" counting_classe[i]=0\n",
"\n",
"\n",
" for o in structural_patterns_of_a_group.values():\n",
" n = nodes_in_trivial_class(o)\n",
" for i in range(num_region):\n",
" if i not in n:\n",
" counting_classe[i]=counting_classe[i]+1\n",
"\n",
" for i in range(num_region): \n",
" counting_classe[i] = counting_classe[i]/len(list(orbits_dataset.keys()))\n",
"\n",
" return counting_classe\n",
"\n",
"def nodal_percentage_participation_dataset(structural_pattern_dataset,num_region):\n",
" \"\"\"dataset is dataset of structural_pattern_dataset at an already fixed sparsity and associated with previously given collection of statistics,\n",
" input is a dictionary with keys (class,subject) and values the structural pattern, \n",
" return a dictionary whose keys are the class in the dataset and values is a dictionary keyd by nodes and values are nodal percentage participation in non trivial class \n",
" \"\"\"\n",
" \n",
" counting_dataset = {}\n",
" \n",
" for classe in set([k[0] for k in orbits_dataset.keys()]):\n",
" counting_classe={}\n",
" \n",
" for i in range(num_region):\n",
" counting_classe[i]=0\n",
" \n",
" all_classe=[orb for k,orb in list(structural_pattern_dataset.items()) if k[0]==classe]\n",
" \n",
" \n",
" for o in all_classe:\n",
" n = nodes_in_trivial_class(o)\n",
" for i in range(num_region):\n",
" if i not in n:\n",
" counting_classe[i]=counting_classe[i]+1\n",
" \n",
" for i in range(num_region): \n",
" counting_classe[i] = counting_classe[i]/len(all_classe)\n",
" counting_dataset[classe] = counting_classe\n",
" \n",
" return counting_dataset \n",
"\n",
"def help_to_structural_patterns(dataset_adj,statistics,precision=6):\n",
" \"\"\" convert a dataset of adjacency matrices to one of structural pattern associated to all statistics in the given list of statistics\n",
" implemented only for the following nodal statistics:\n",
" \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n",
" \n",
" \"\"\"\n",
" dataset_orbits = {}\n",
" for key,adj in dataset_adj.items():\n",
" G = nx.from_numpy_array(adj)\n",
" orbits_of_G = graph_structural_pattern(G, statistics=statistics, precision=precision)\n",
" dataset_orbits[key] = orbits_of_G\n",
" \n",
" return dataset_orbits\n",
"\n",
"from itertools import combinations\n",
"def orthogonality_of_dataset_over_pairs(dataset, statistics = [\"d\",\"b\",\"cc\",\"cs\",\"s\"],precision = 6): \n",
" \"\"\" evaluate the orthogonality of all possible pairs of nodal statistics in the input list on a given dataset,\n",
" dataset can be either adjacencies dataset or graph dataset (class,subjectID):adj_mat/(class,subjectID):graph\n",
" implemented only for the following nodal statistics:\n",
" \"d\" degree,\"b\" betweenness centrality,\"cc\" clustering coefficient,cs\" closeness centrality,\"s\" second order centrality\n",
" return a dictionary keys by statistics pairs with values a list of orthogonality score of the elements in dataset\n",
" \"\"\"\n",
" \n",
" dataset_converted = help_to_structural_patterns(dataset,statistics,precision=precision)\n",
" sample = list(dataset.values())[0]\n",
" N = sample.shape[0]\n",
" \n",
" ortho = {}\n",
"\n",
" couple_statistics = list(combinations(statistics,2))\n",
" for k in range(len(couple_statistics)):\n",
" couple = couple_statistics[k]\n",
" ortho[couple] = []\n",
" for k in range(len(couple_statistics)):\n",
" couple = couple_statistics[k]\n",
" for key,str_pat in dataset_converted.items() :\n",
" intersection = str_pat.get(couple[0])\n",
" intersection = intersection_structural_patterns(intersection,str_pat.get(couple[1]))\n",
" o = (N-len(nodes_in_trivial_class(intersection)))/N\n",
" ortho[couple].append(o) \n",
" return ortho"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment