In [1]:

    
repo_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'
data_dir = '/Users/iaincarmichael/data/courtlistener/'

import numpy as np
import sys
import matplotlib.pyplot as plt
from scipy.stats import rankdata
import cPickle as pickle
from collections import Counter
import pandas as pd

# graph package
import igraph as ig

# our code
sys.path.append(repo_directory + 'code/')
from setup_data_dir import setup_data_dir, make_subnetwork_directory
from pipeline.download_data import download_bulk_resource, download_master_edgelist, download_scdb
from helpful_functions import case_info
from viz import print_describe

sys.path.append(repo_directory + 'vertex_metrics_experiment/code/')

from custom_vertex_metrics import *
from results import *


# which network to download data for
network_name = 'federal' # 'federal', 'ca1', etc


# some sub directories that get used
raw_dir = data_dir + 'raw/'
subnet_dir = data_dir + network_name + '/'
text_dir = subnet_dir + 'textfiles/'


# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline



In [2]:

    
G = ig.Graph.Read_GraphML(subnet_dir + network_name +'_network.graphml')









    



/Users/iaincarmichael/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: RuntimeWarning: Could not add vertex ids, there is already an 'id' vertex attribute at foreign-graphml.c:443
  if __name__ == '__main__':

number of cases per year



In [23]:

    
jurisdictions = set(G.vs['court'])

plt.figure(figsize=[20, 20])
k = 0
for court in jurisdictions:
    k += 1
    plt.subplot(4, 4, k)
    
    cases = G.vs.select(court_eq=court)

    year_counts = Counter(cases['year'])

    plt.plot(year_counts.keys(),
                year_counts.values())
    plt.xlabel('year')
    plt.xlim([1750, 2016])
    
    plt.ylabel('counts')
    plt.title(court)

CiteRank time distribution



In [25]:

    
H_seq = [1, 2, 5, 10, 20]

n_H = len(H_seq)
k = 0

for h in H_seq:
    k += 1
    H = 5
    
    cr = get_CiteRank(G, h)
    years = G.vs['year']

    cr_year_mean = get_year_aggregate(years, cr, np.mean)
    cr_year_median = get_year_aggregate(years, cr, np.median)

    plt.figure(figsize=[8, 4])

#     plt.subplot(n_H,3,1)
#     plt.scatter(years, cr)
#     plt.xlim([1750, 2016])
#     plt.ylim([0, 1.2 * max(cr)])
#     plt.ylabel('CiteRank_%d' % H)
#     plt.xlabel('year')



    plt.subplot(n_H,2,1)
    plt.plot(cr_year_mean.index, cr_year_mean)
    plt.ylabel('CiteRank_%d mean' % H)
    plt.xlabel('year')

    plt.subplot(n_H,2, 2)
    plt.plot(cr_year_median.index, cr_year_median)
    plt.ylabel('CiteRank_%d median' % H)
    plt.xlabel('year')
    
    corr = np.corrcoef(cr, years)[0, 1]
    print "h: %d, corr: %f" %(h, corr)









    



h: 1, corr: -0.097825
h: 2, corr: -0.101178
h: 5, corr: -0.114198
h: 10, corr: -0.131687
h: 20, corr: -0.148420

outdegree



In [72]:

    
outdegree = G.outdegree()



In [77]:

    
print np.mean(outdegree)
print np.median(outdegree)









    



6.92710406933
3.0

citation ages



In [78]:

    
diffs = [G.vs[e[0]]['year'] - G.vs[e[1]]['year'] for e in G.get_edgelist()]



In [86]:

    
print_describe(diffs)









    



nobs: 6649916
mean: 12.171
median: 7.000
min: -39.000
max: 220.000
std: 15.061
unique values 245



In [87]:

    
bins = np.linspace(-40, 300, 100)
plt.hist(diffs, bins=bins)
plt.xlim(-40, 300)
plt.xlabel('citation age')









    Out[87]:





<matplotlib.text.Text at 0x11da9db90>

SCOTUS indegree



In [21]:

    
scotus_cases = G.vs.select(court_eq='scotus')

scotus_indegree = scotus_cases.indegree()
scotus_outdegree = scotus_cases.outdegree()
scotus_years = scotus_cases['year']



In [22]:

    
out_by_year = get_year_aggregate(scotus_years, scotus_outdegree, np.median)
in_by_year = get_year_aggregate(scotus_years, scotus_indegree, np.median)



In [25]:

    
plt.scatter(in_by_year.index, in_by_year,
           label='indegree', color='red')
plt.scatter(out_by_year.index, out_by_year,
           label='outdegree', color='blue')
plt.ylabel('median')
plt.xlabel('year')
plt.legend(loc='upper right')
plt.xlim([1800, 2017])
plt.ylim([0, 200])
plt.title('SCOTUS cases in federal network')









    Out[25]:





<matplotlib.text.Text at 0x11a94a7d0>

Top cases



In [4]:

    
snap_dir = subnet_dir + 'snapshots/'

year = 2015
S = pd.read_csv(snap_dir + 'vertex_metrics_%d.csv' % year, index_col=0)



In [9]:

    
h = 5
cr = get_CiteRank(G, 5)



In [10]:

    
top_cr = pd.Series(cr, index=G.vs['name'])



In [16]:

    
for op_id in top_cr.sort_values(ascending=False)[:10].index:
    case_info(op_id)









    



Anders v. California
date_filed: 1967-05-08
https://www.courtlistener.com/opinion/107423/anders-v-california/
Slack v. McDaniel
date_filed: 2000-04-26
https://www.courtlistener.com/opinion/118359/slack-v-mcdaniel/
Miller-El v. Cockrell
date_filed: 2003-02-25
https://www.courtlistener.com/opinion/122258/miller-el-v-cockrell/
Gall v. United States
date_filed: 2007-12-10
https://www.courtlistener.com/opinion/145843/gall-v-united-states/
Boyd v. United States
date_filed: 1886-02-01
https://www.courtlistener.com/opinion/91573/boyd-v-united-states/
Griffin v. Illinois
date_filed: 1956-05-28
https://www.courtlistener.com/opinion/105382/griffin-v-illinois/
Strickland v. Washington
date_filed: 1984-06-25
https://www.courtlistener.com/opinion/111170/strickland-v-washington/
Johnson v. Zerbst
date_filed: 1938-05-31
https://www.courtlistener.com/opinion/103050/johnson-v-zerbst/
United States v. Flores
date_filed: 2011-01-31
https://www.courtlistener.com/opinion/183926/united-states-v-flores/
Almendarez-Torres v. United States
date_filed: 1998-03-24
https://www.courtlistener.com/opinion/118188/almendarez-torres-v-united-states/



In [62]:









    



scotus makes up 2.90 percent of cases in federal + scotus
29.39 percent of federal citations go to scotus cases

Data problems

backward edges



In [ ]:

    
diffs = [G.vs[e[0]]['year'] - G.vs[e[1]]['year'] for e in G.get_edgelist()]

backward = np.where(np.array(diffs) < 0)



In [71]:

    
i = 10
bad_edge = G.es[backward[0][i]]

source = G.vs[bad_edge.source]
target = G.vs[bad_edge.target]

print source
case_info(source['name'])
print 
print target
case_info(target['name'])









    



igraph.Vertex(<igraph.Graph object at 0x119d3faf8>,345469,{'year': 1975.0, 'issueArea': nan, 'court': 'ca4', 'id': 'n345469', 'name': '373434'})
21 Fair empl.prac.cas. 895, 22 Empl. Prac. Dec. P 30,563
date_filed: 1975-06-25
https://www.courtlistener.com/opinion/373434/21-fair-emplpraccas-895-22-empl-prac-dec-p-30563/

igraph.Vertex(<igraph.Graph object at 0x119d3faf8>,309424,{'year': 1978.0, 'issueArea': nan, 'court': 'ca4', 'id': 'n309424', 'name': '360466'})
Donald L. Cale v. The City of Covington, Virginia
date_filed: 1978-11-03
https://www.courtlistener.com/opinion/360466/donald-l-cale-v-the-city-of-covington-virginia/