Association Rules Graph Analysis

  • 10%

In [6]:
import matplotlib
%matplotlib inline

In [7]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [8]:
petitions = pd.read_csv('/Users/catherinedumas/data/dissertaion/petitions/adjacency_matrix_10_percent_confidence.csv')
adjacency_matrix = petitions.T.values[1:]
petition_ids = petitions['Unnamed: 0']
petition_labels = {i:x for i,x in petition_ids.iteritems()}

In [9]:
print petition_ids


0      975
1      980
2      981
3      982
4      983
5      984
6      985
7      987
8      990
9      996
10    1003
11    1006
12    1008
13    1009
14    1010
15    1013
16    1016
17    1025
18    1029
19    1043
20    1052
Name: Unnamed: 0, dtype: int64

In [10]:
print petition_labels


{0: 975, 1: 980, 2: 981, 3: 982, 4: 983, 5: 984, 6: 985, 7: 987, 8: 990, 9: 996, 10: 1003, 11: 1006, 12: 1008, 13: 1009, 14: 1010, 15: 1013, 16: 1016, 17: 1025, 18: 1029, 19: 1043, 20: 1052}

In [11]:
print adjacency_matrix


[[0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 1 1 0 0 0 1 1 0 0 0 0 0 0]
 [1 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0]
 [0 1 1 0 1 0 0 1 0 1 0 1 0 1 1 0 0 0 1 0 0]
 [0 1 0 0 0 0 1 0 1 0 0 1 0 1 1 0 0 0 1 0 0]
 [1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 1 0 1 0 1 1 1 1 1 0 0 1 1 0 0 0 1 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 1 0 0 0 0 1 1 1 0 1 0 0 1 0 0 0 1 0 0]
 [0 1 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0]]

In [12]:
graph = nx.from_numpy_matrix(adjacency_matrix)

In [13]:
categories_colors = pd.read_csv('../data/petitions_groups_colors.csv')

In [14]:
color_table =  [ [row[0],row[1],row[2]] for i,row in categories_colors.iterrows()]

In [15]:
petitions_categories_table = pd.read_csv('../data/petitions_grouping.csv')
petitions_categories =  { row[0]:row[1] for i,row in petitions_categories_table.iterrows() }

In [16]:
node_colors_array = [ color_table[petitions_categories[pid]] for pid in petition_ids ]

In [18]:
nx.draw_spring(graph, labels = petition_labels,node_color = node_colors_array)