In [1]:
repo_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'
data_dir = '/Users/iaincarmichael/data/courtlistener/'
import numpy as np
import pandas as pd
import sys
import matplotlib.pyplot as plt
from scipy.stats import rankdata, ttest_ind, ttest_rel
import cPickle as pickle
# graph package
import igraph as ig
# our code
sys.path.append(repo_directory + 'code/')
from setup_data_dir import setup_data_dir, make_subnetwork_directory
from pipeline.download_data import download_bulk_resource, download_master_edgelist, download_scdb
from helpful_functions import case_info
sys.path.append(repo_directory + 'vertex_metrics_experiment/code/')
from results import *
from make_tr_edge_df import *
# which network to download data for
network_name = 'scotus' # 'federal', 'ca1', etc
# some sub directories that get used
raw_dir = data_dir + 'raw/'
subnet_dir = data_dir + network_name + '/'
text_dir = subnet_dir + 'textfiles/'
results_dir = subnet_dir + 'results/'
# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
G = ig.Graph.Read_GraphML(subnet_dir + network_name +'_network.graphml')
In [3]:
G.summary()
Out[3]:
In [8]:
# name = 'scotus_test'
name = 'scotus_citerank_words'
name = 'eigen'
name = '1_16_17'
sort_path = results_dir + 'sort/%s/rankloss_sort.p' % name
# rankloss_sort = pickle.load(open(sort_path, "rb"))
rankloss_sort = pd.read_pickle(open(sort_path, "rb"))
match_path = results_dir + 'match/%s/rankloss_match.p' % name
rankloss_match = pd.read_pickle(open(match_path, "rb"))
# logreg_path = results_dir + 'logreg/%s/rankloss_LR.p' % name
# rankloss_logreg = pickle.load(open(logreg_path, "rb"))
rankloss = {'sort': rankloss_sort,
'match': rankloss_match}#,
#'logreg': rankloss_logreg}
In [9]:
rankloss['sort']['MRS'].columns
Out[9]:
In [14]:
exper = 'sort'
metric = 'MRS'
rankloss[exper][metric][['indegree', 'num_words']].mean()
Out[14]:
In [11]:
?plot_scores
In [51]:
exper = 'sort'
metric = 'MRS'
plt.figure(figsize=[10, 10])
plot_scores(rankloss[exper][metric], exper=exper, metric=metric)
In [79]:
exper = 'sort'
metric = 'MRS'
In [80]:
all_metrics = rankloss[exper][metric].columns.tolist()
In [56]:
dir_time_agnostic_metrics = ['indegree', 'outdegree',
'd_pagerank', 'authorities',
'd_betweenness']
plot_scores(rankloss[exper][metric][dir_time_agnostic_metrics], exper=exper, metric=metric, network_name=network_name)
In [58]:
undir_time_agnostic_metrics = copy.copy(dir_time_agnostic_metrics)
undir_time_agnostic_metrics += ['u_pagerank', 'u_eigen', 'u_betweenness', 'degree']
plot_scores(rankloss[exper][metric][undir_time_agnostic_metrics], exper=exper, metric=metric, network_name=network_name)
In [ ]:
In [63]:
dir_time_agnostic_metrics_text = copy.copy(dir_time_agnostic_metrics)
dir_time_agnostic_metrics_text += ['num_words']
plot_scores(rankloss[exper][metric][dir_time_agnostic_metrics_text], exper=exper, metric=metric, network_name=network_name)
In [47]:
# dir_time_aware_metrics = [m for m in all_metrics if 'recentcite' in m]
# dir_time_aware_metrics += [m for m in all_metrics if 'citerank' in m]
# dir_time_aware_metrics += ['age']
In [48]:
# plot_scores(rankloss[exper][metric][dir_time_aware_metrics], exper=exper, metric=metric)
In [59]:
all_dir_metrics = copy.copy(dir_time_agnostic_metrics)
# all_dir_metrics += ['polyrank_2', 'polyrank_5', 'polyrank_10']
all_dir_metrics += ['citerank_2', 'citerank_5', 'citerank_10', 'citerank_50']
all_dir_metrics += ['recentcite_2' ,'recentcite_5', 'recentcite_10', 'recentcite_20']
all_dir_metrics += ['age']
In [61]:
plot_scores(rankloss[exper][metric][all_dir_metrics], exper=exper, metric=metric, network_name=network_name)
In [15]:
# to_compare = ['outdegree', 'hubs']
# to_compare = ['recentcite_10', 'citerank_2']
to_compare = ['num_words', 'indegree']
exper = 'sort'
metric = 'MRS'
data = rankloss[exper][metric][to_compare]
print '%s vs. %s' % ( to_compare[0], to_compare[1])
print '%s experiment, %s' % (exper,metric)
print 'two sided t-test for equal means'
print
print 'dependent paired samples'
print ttest_rel(data[to_compare[0]], data[to_compare[1]])
In [ ]: