In [1]:
repo_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'
data_dir = '/Users/iaincarmichael/Data/courtlistener/'
import numpy as np
import sys
import matplotlib.pyplot as plt
from scipy.stats import rankdata
from collections import Counter
import time
from math import *
from scipy.sparse import csr_matrix
# graph package
import igraph as ig
# our code
sys.path.append(repo_directory + 'code/')
from setup_data_dir import setup_data_dir, make_subnetwork_directory
from pipeline.download_data import download_bulk_resource, download_master_edgelist, download_scdb
from helpful_functions import case_info
sys.path.append(repo_directory + 'vertex_metrics_experiment/code/')
from rankscore_experiment_sort import *
from rankscore_experiment_LR import *
from rankscore_experiment_search import *
from time_aware_pagerank import *
from make_tr_edge_df import *
# which network to download data for
network_name = 'federal' # 'federal', 'ca1', etc
# some sub directories that get used
raw_dir = data_dir + 'raw/'
subnet_dir = data_dir + network_name + '/'
text_dir = subnet_dir + 'textfiles/'
# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
# load scotes
G = ig.Graph.Read_GraphML(subnet_dir + network_name +'_network.graphml')
In [3]:
# # get a small sugraph to work wit
# np.random.seed(754) # 234, 754
# v = G.vs[np.random.choice(range(len(G.vs)))]
# subset_ids = G.neighborhood(v.index, order=2)
# g = G.subgraph(subset_ids)
# print '%d nodes' % len(g.vs)
# A = np.array(g.get_adjacency().data)
# years = np.array(g.vs['year']).astype(int)
In [ ]:
In [ ]:
%%time
A = np.array(G.get_adjacency().data)
# np.save('scotus_adjmat', A)
# A = np.load('scotus_adjmat.npy')
# years = np.array(G.vs['year']).astype(int)
In [ ]:
p = .85
qtv = .2
qvt = .8
In [ ]:
%%time
ta_pr, pr_year = get_time_aware_pagerank(A, years, p, qtv, qvt)
In [ ]:
%%time
pr = G.pagerank()
In [ ]:
plt.figure(figsize=[10, 10])
n = len(ta_pr)
m = len(pr_year)
# plot pr vs ta_pr
plt.subplot(2,2,1)
plt.scatter(range(n), pr, color='blue', label='pr')
plt.scatter(range(n), ta_pr, color='red', label='ta pr')
plt.xlim([0, n])
plt.ylim([0, 1.2 * max(max(ta_pr), max(pr))])
plt.legend(loc='upper right')
plt.xlabel('vertex')
plt.ylabel('pr value')
# plot time
plt.subplot(2,2,2)
plt.scatter(range(min(years), max(years) + 1), pr_year)
plt.xlim([min(years), max(years) ])
plt.ylim([0, 1.2 * max(pr_year)])
plt.ylabel('pr value')
plt.xlabel('year')
# plot pr vs time
plt.subplot(2,2,3)
plt.scatter(years, pr)
plt.xlim([min(years), max(years) ])
plt.ylim([0, max(pr)])
plt.ylabel('pr value')
plt.xlabel('year')
# plot ta pr vs time
plt.subplot(2,2,4)
plt.scatter(years, ta_pr)
plt.xlim([min(years), max(years) ])
plt.ylim([0, max(pr)])
plt.ylabel('ta pr value')
plt.xlabel('year')
In [ ]:
p = .85
qtv = .3
qvts = [.1, .3, .5, .7, .9]
num_values = len(qvts)
ta_prs = np.zeros((len(ta_pr), num_values))
pr_years = np.zeros((len(pr_year), num_values))
for i in range(num_values):
qvt = qvts[i]
ta_pr, pr_year = get_time_aware_pagerank(A, years, p, qtv, qvt)
pr_years[:, i] = pr_year
ta_prs[:, i] = ta_pr
In [ ]:
plt.figure(figsize=[10, 5])
# plot some vertices
plt.subplot(1,2,1)
for i in range(500):
node = np.random.choice(range(A.shape[0]))
plt.plot(qvts,
ta_prs[node,: ],
alpha=.4)
plt.xlabel('qvt')
plt.ylabel('pr value')
plt.xlim([min(qvts), max(qvts)])
# plt.ylim([ta_prs.min(), ta_prs.max()])
# plot year shape
plt.subplot(1,2,2)
for i in range(num_values):
# plot time
plt.plot(range(min(years), max(years) + 1),
pr_years[:, i],
label='qvt: %1.2f' % qvts[i])
plt.xlim([min(years), max(years) ])
plt.ylim([0, 1.2 * pr_years.max()])
plt.ylabel('pr value')
plt.xlabel('year')
plt.legend(loc='upper right')
In [ ]:
import seaborn.apionly as sns
In [ ]:
colors = sns.color_palette("Blues",ta_prs.shape[1]- 1 )
for i in range(ta_prs.shape[1]- 1):
plt.scatter(years,
ta_prs[:, i],
label='qvt: %1.2f' % qvts[i],
color=colors[i])
plt.xlim([min(years), max(years) ])
plt.ylim([0, .02])
plt.ylabel('pr value')
plt.xlabel('year')
plt.legend(loc='upper right')
In [ ]: