In [1]:
import sys

sys.path.append('../../code/')
import os
import json
from datetime import datetime
import time
from math import *

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats


import igraph as ig

from load_data import load_citation_network, case_info

%load_ext autoreload
%autoreload 2
%matplotlib inline

data_dir = '../../data/'
court_name = 'scotus'

load into igraph


In [2]:
start = time.time()
if court_name == 'all':
    case_metadata = pd.read_csv(data_dir + 'clean/case_metadata_master.csv')

    edgelist = pd.read_csv(data_dir + 'clean/edgelist_master.csv')
else:
    net_dir = data_dir + 'clean/' + court_name + '/'
    if not os.path.exists(net_dir):
        os.makedirs(net_dir)
        make_court_subnetwork(court_name, data_dir)

    case_metadata = pd.read_csv(net_dir + 'case_metadata.csv')

    edgelist = pd.read_csv(net_dir + 'edgelist.csv')
    edgelist.drop('Unnamed: 0', inplace=True, axis=1)

# create a dictonary that maps court listener ids to igraph ids
cl_to_ig_id = {}
cl_ids = case_metadata['id'].tolist()
for i in range(case_metadata['id'].size):
    cl_to_ig_id[cl_ids[i]] = i

# add nodes
V = case_metadata.shape[0]
g = ig.Graph(n=V, directed=True)
# g.vs['date'] = case_metadata['date'].tolist()
g.vs['name'] = case_metadata['id'].tolist()

# create igraph edgelist
cases_w_metadata = set(cl_to_ig_id.keys())
ig_edgelist = []
missing_cases = 0
start = time.time()
for row in edgelist.itertuples():

    cl_ing = row[1]
    cl_ed = row[2]

    if (cl_ing in cases_w_metadata) and (cl_ed in cases_w_metadata):
        ing = cl_to_ig_id[cl_ing]
        ed = cl_to_ig_id[cl_ed]
    else:
        missing_cases += 0
    
    ig_edgelist.append((ing, ed))

# add edges to graph
g.add_edges(ig_edgelist)


# add vertex attributes
g.vs['court'] =  case_metadata['court'].tolist()
g.vs['year'] = [int(d.split('-')[0]) for d in case_metadata['date'].tolist()]

end = time.time()

print '%d seconds for %d edges' % (end - start, len(g.es))


0 seconds for 250465 edges

In [12]:
g.summary()


Out[12]:
'IGRAPH DN-- 33248 250465 -- \n+ attr: court (v), name (v), year (v)'

community detection


In [29]:
# make graph undirected
gu = g.copy().as_undirected()

modularity based clustering


In [51]:
start = time.time()
mod_clusters = gu.community_fastgreedy().as_clustering()
end = time.time()
print 'fastgreedy modularity took %d seconds with %d nodes and %d edges' % (end-start, len(g.vs), len(g.es))

mod_cl_sizes = mod_clusters.sizes()
{s: mod_cl_sizes.count(s) for s in set(mod_cl_sizes)}


fastgreedy modularity took 215 seconds with 33248 nodes and 250465 edges
Out[51]:
{1: 4477,
 2: 596,
 3: 32,
 4: 21,
 5: 9,
 6: 6,
 7: 2,
 8: 4,
 9: 1,
 10: 1,
 11: 1,
 12: 1,
 16: 1,
 18: 1,
 25: 1,
 28: 1,
 51: 1,
 65: 1,
 80: 1,
 104: 1,
 368: 1,
 4355: 1,
 6666: 1,
 7114: 1,
 8340: 1}

walk trap clustering


In [52]:
# start = time.time()
# walktrap = gu.community_walktrap(steps=4)
# end = time.time()
# print 'walktrap took %d seconds with %d nodes and %d edges' % (end-start, len(g.vs), len(g.es))

# walktrap_clusters = walktrap.as_clustering()

walktrap_cl_sizes = walktrap_clusters.sizes()
{s: walktrap_cl_sizes.count(s) for s in set(walktrap_cl_sizes)}


Out[52]:
{1: 5686,
 2: 1085,
 3: 185,
 4: 90,
 5: 61,
 6: 32,
 7: 32,
 8: 21,
 9: 19,
 10: 15,
 11: 14,
 12: 13,
 13: 7,
 14: 4,
 15: 6,
 16: 5,
 17: 4,
 18: 1,
 19: 4,
 20: 5,
 21: 4,
 23: 1,
 24: 1,
 25: 5,
 26: 5,
 27: 2,
 28: 1,
 29: 2,
 30: 3,
 31: 1,
 32: 2,
 33: 3,
 34: 1,
 35: 1,
 36: 1,
 38: 3,
 39: 1,
 40: 1,
 42: 2,
 43: 2,
 44: 1,
 46: 2,
 47: 1,
 49: 1,
 51: 1,
 52: 1,
 54: 1,
 66: 2,
 75: 1,
 76: 1,
 77: 1,
 85: 1,
 86: 1,
 97: 1,
 107: 1,
 121: 1,
 133: 1,
 134: 1,
 135: 1,
 139: 1,
 151: 1,
 153: 1,
 155: 1,
 168: 1,
 182: 1,
 195: 1,
 211: 1,
 286: 1,
 321: 1,
 322: 1,
 355: 1,
 393: 1,
 412: 1,
 417: 1,
 431: 1,
 471: 1,
 513: 1,
 581: 1,
 631: 1,
 1102: 1,
 1293: 1,
 2257: 1,
 3674: 1,
 4640: 1}

In [ ]:
mod_cl_sizes = mod_clusters.sizes()
{s: mod_cl_sizes.count(s) for s in set(mod_cl_sizes)}

In [3]:
x = 2

In [4]:
x


Out[4]:
2

In [ ]: