In [1]:
repo_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'

data_dir = '/Users/iaincarmichael/Data/courtlistener/'

import numpy as np
import sys
import matplotlib.pyplot as plt
from scipy.stats import rankdata
from collections import Counter
import time
from math import *

from scipy.sparse import csr_matrix

# graph package
import igraph as ig

# our code
sys.path.append(repo_directory + 'code/')
from setup_data_dir import setup_data_dir, make_subnetwork_directory
from pipeline.download_data import download_bulk_resource, download_master_edgelist, download_scdb
from helpful_functions import case_info

sys.path.append(repo_directory + 'vertex_metrics_experiment/code/')
from rankscore_experiment_sort import *
from rankscore_experiment_LR import *
from rankscore_experiment_search import *
from time_aware_pagerank import *


from make_tr_edge_df import *

# which network to download data for
network_name = 'federal' # 'federal', 'ca1', etc


# some sub directories that get used
raw_dir = data_dir + 'raw/'
subnet_dir = data_dir + network_name + '/'
text_dir = subnet_dir + 'textfiles/'


# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
# load scotes
G = ig.Graph.Read_GraphML(subnet_dir + network_name +'_network.graphml')

small subgraph


In [3]:
# # get a small sugraph to work wit
# np.random.seed(754) # 234, 754
# v = G.vs[np.random.choice(range(len(G.vs)))]

# subset_ids = G.neighborhood(v.index, order=2)
# g = G.subgraph(subset_ids)

# print '%d nodes' % len(g.vs)

# A = np.array(g.get_adjacency().data)
# years = np.array(g.vs['year']).astype(int)

In [ ]:

SCOTUS


In [ ]:
%%time
A = np.array(G.get_adjacency().data)

# np.save('scotus_adjmat', A)
# A = np.load('scotus_adjmat.npy')
# years = np.array(G.vs['year']).astype(int)

time aware pagerank


In [ ]:
p = .85
qtv = .2
qvt = .8

In [ ]:
%%time
ta_pr, pr_year =  get_time_aware_pagerank(A, years, p, qtv, qvt)

In [ ]:
%%time
pr = G.pagerank()

In [ ]:
plt.figure(figsize=[10, 10])
n = len(ta_pr)
m = len(pr_year)

# plot pr vs ta_pr
plt.subplot(2,2,1)
plt.scatter(range(n), pr, color='blue', label='pr')
plt.scatter(range(n), ta_pr, color='red', label='ta pr')
plt.xlim([0, n])
plt.ylim([0, 1.2 * max(max(ta_pr), max(pr))])
plt.legend(loc='upper right')
plt.xlabel('vertex')
plt.ylabel('pr value')

# plot time
plt.subplot(2,2,2)
plt.scatter(range(min(years), max(years) + 1), pr_year)
plt.xlim([min(years), max(years) ])
plt.ylim([0, 1.2 * max(pr_year)])
plt.ylabel('pr value')
plt.xlabel('year')

# plot pr vs time
plt.subplot(2,2,3)
plt.scatter(years, pr)
plt.xlim([min(years), max(years) ])
plt.ylim([0, max(pr)])
plt.ylabel('pr value')
plt.xlabel('year')


# plot ta pr vs time
plt.subplot(2,2,4)
plt.scatter(years, ta_pr)
plt.xlim([min(years), max(years) ])
plt.ylim([0, max(pr)])
plt.ylabel('ta pr value')
plt.xlabel('year')

vary qvts


In [ ]:
p = .85
qtv = .3
qvts = [.1, .3, .5, .7, .9]

num_values = len(qvts)

ta_prs = np.zeros((len(ta_pr), num_values))
pr_years = np.zeros((len(pr_year), num_values))

for i in range(num_values):
    qvt = qvts[i]
    ta_pr, pr_year =  get_time_aware_pagerank(A, years, p, qtv, qvt)
    
    pr_years[:, i] = pr_year
    ta_prs[:, i] = ta_pr

In [ ]:
plt.figure(figsize=[10, 5])

# plot some vertices
plt.subplot(1,2,1)
for i in range(500):
    node = np.random.choice(range(A.shape[0]))
    plt.plot(qvts,
             ta_prs[node,: ],
             alpha=.4)

plt.xlabel('qvt')
plt.ylabel('pr value')
plt.xlim([min(qvts), max(qvts)])
# plt.ylim([ta_prs.min(), ta_prs.max()])


# plot year shape
plt.subplot(1,2,2)
for i in range(num_values):
    # plot time
    plt.plot(range(min(years), max(years) + 1),
             pr_years[:, i],
             label='qvt: %1.2f' % qvts[i])
    
plt.xlim([min(years), max(years) ])
plt.ylim([0, 1.2 * pr_years.max()])
plt.ylabel('pr value')
plt.xlabel('year')
plt.legend(loc='upper right')

In [ ]:
import seaborn.apionly as sns

In [ ]:
colors = sns.color_palette("Blues",ta_prs.shape[1]- 1 )
for i in range(ta_prs.shape[1]- 1):
    plt.scatter(years,
                ta_prs[:, i],
                label='qvt: %1.2f' % qvts[i],
                color=colors[i])

plt.xlim([min(years), max(years) ])
plt.ylim([0, .02])
plt.ylabel('pr value')
plt.xlabel('year')
plt.legend(loc='upper right')

In [ ]: