In [15]:
repo_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'
data_dir = '/Users/iaincarmichael/Data/courtlistener/'
import sys
import numpy as np
import igraph as ig
sys.path.append(repo_directory + 'vertex_metrics_experiment/code/')
from experiment_helper_functions import get_test_cases
from run_exper import *
# which network to download data for
network_name = 'scotus' # 'federal', 'ca1', etc
# some sub directories that get used
subnet_dir = data_dir + network_name + '/'
results_dir = subnet_dir + 'results/'
# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [6]:
G = ig.Graph.Read_GraphML(subnet_dir + network_name +'_network.graphml')
In [7]:
vertex_metrics = ['indegree', 'outdegree', 'degree',
'd_pagerank','u_pagerank',
'authorities', 'hubs',
#'d_eigen', 'u_eigen', # d_eigen is being problematic
'u_eigen',
'd_betweenness', 'u_betweenness',
'd_closeness', 'u_closeness']
# add recent citations
vertex_metrics += ['recentcite_' + str(t) for t in np.arange(1, 10 + 1)]
vertex_metrics += ['recentcite_' + str(t) for t in [15, 20, 25, 30, 35, 40]]
vertex_metrics += ['age', 'similarity']
vertex_metrics = ['age', 'similarity']
vertex_metrics += ['indegree', 'outdegree']
active_years = range(1900, 2015 + 1)
In [10]:
test_seed = 4332,
num_test_cases = 10
test_cases = get_test_cases(G, active_years, num_test_cases, seed=test_seed)
In [11]:
name = 'test'
In [14]:
%%time
test_case_ids = [v.index for v in test_cases]
sort_params = {}
exper_params_sort = {'vertex_metrics': vertex_metrics,
'active_years': active_years,
'test_case_ids': test_case_ids,
'sort_params': sort_params}
run_sort(G, exper_params_sort, subnet_dir, name)
In [ ]:
num_to_keep = 5000
match_params = {'num_to_keep': num_to_keep}
exper_params_match = {'vertex_metrics': vertex_metrics,
'active_years': active_years,
'test_case_ids': test_case_ids,
'match_params': match_params}
run_match(G, exper_params_match, subnet_dir, name)
In [ ]:
num_absent_edges = len(G.es)
seed_edge_df = 32432
metric_normalization = 'mean'
feature_transform = 'poly2'
make_tr_data = False
logreg_params = {'num_absent_edges': num_absent_edges,
'seed_edge_df': seed_edge_df,
'metric_normalization': metric_normalization,
'feature_transform': feature_transform,
'make_tr_data': make_tr_data}
exper_params_logreg = {'vertex_metrics': vertex_metrics,
'active_years': active_years,
'test_case_ids': test_case_ids,
'logreg_params': logreg_params}
run_logreg(G, exper_params_logreg, subnet_dir, name)
In [18]:
rankloss_sort = pickle.load(open( results_dir + 'sort/%s/rankloss_sort.p' % name, "rb" ) )
# rankloss_match = pickle.load(open( results_dir + 'match/%s/rankloss_match.p' % name, "rb" ) )
# rankloss_logreg = pickle.load(open( results_dir + 'logreg/%s/rankloss_logreg.p' % name, "rb" ) )
In [22]:
MRS_sort = rankloss_sort['MRS'].mean().sort_values()
RR_sort = rankloss_sort['RR'].mean().sort_values()
PAK100_sort = rankloss_sort['PAK100'].mean().sort_values()
PAK1000_sort = rankloss_sort['PAK1000'].mean().sort_values()
MRS_sort
Out[22]:
In [ ]: