Association Rules Graph Analysis

  • 50%

In [1]:
import matplotlib
%matplotlib inline

In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [3]:
petitions = pd.read_csv('../data/adjacency_matrix_50_percent_confidence.csv')
adjacency_matrix = petitions.T.values[1:]
petition_ids = petitions['Unnamed: 0']
petition_labels = {i:x for i,x in petition_ids.iteritems()}

In [4]:
print petition_ids


0      975
1      980
2      981
3      982
4      983
5      984
6      985
7      987
8      990
9      996
10    1003
11    1006
12    1008
13    1009
14    1010
15    1013
16    1016
17    1025
18    1029
19    1043
20    1052
Name: Unnamed: 0, dtype: int64

In [5]:
print petition_labels


{0: 975, 1: 980, 2: 981, 3: 982, 4: 983, 5: 984, 6: 985, 7: 987, 8: 990, 9: 996, 10: 1003, 11: 1006, 12: 1008, 13: 1009, 14: 1010, 15: 1013, 16: 1016, 17: 1025, 18: 1029, 19: 1043, 20: 1052}

In [6]:
print adjacency_matrix


[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 1 1 0 0 0 1 1 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

In [7]:
graph = nx.from_numpy_matrix(adjacency_matrix)

In [8]:
categories_colors = pd.read_csv('../data/petitions_groups_colors.csv')

In [9]:
color_table =  [ [row[0],row[1],row[2]] for i,row in categories_colors.iterrows()]

In [10]:
petitions_categories_table = pd.read_csv('../data/petitions_grouping.csv')
petitions_categories =  { row[0]:row[1] for i,row in petitions_categories_table.iterrows() }

In [11]:
node_colors_array = [ color_table[petitions_categories[pid]] for pid in petition_ids ]

In [12]:
pos = nx.spring_layout(graph, scale=10)
# print pos
posframe = pd.DataFrame(pos)
posframe.T.to_csv('../data/graph_positions.csv')

# Then read the coordinates back. 
# At some point this will be the code to be retained,
# and the .csv file will be put under revision control.
posframe2 = pd.read_csv('../data/graph_positions.csv')
pos2 = {i:array([row[0],row[1]]) for i,row in posframe.iteritems()}
# print pos2

In [13]:
nx.draw_networkx_nodes(graph,pos, node_size=1000, node_color=node_colors_array);
nx.draw_networkx_labels(graph,pos, labels = petition_labels);
nx.draw_networkx_edges(graph,pos);