In [17]:
top_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'

from __future__ import division

import os
import sys
import time
from math import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import igraph as ig
import copy

# our code
sys.path.append(top_directory + 'code/')
from load_data import load_and_clean_graph, case_info

from make_snapshots import *
from make_edge_df import *
from attachment_model_inference import *
from compute_ranking_metrics import *
from pipeline_helper_functions import *


# directory set up
data_dir = top_directory + 'data/'
experiment_data_dir = top_directory + 'explore/vertex_metrics_experiment/experiment_data/'

court_name = 'scotus'

# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [2]:
G = load_and_clean_graph(data_dir, court_name)

Make snapshots


In [ ]:
vertex_metrics = ['indegree', 's_pagerank']
snapshot_year_list = np.array([year for year in range(1760, 2021) if year % 10 == 0])

In [9]:
start = time.time()
make_snapshot_vertex_metrics(G, snapshot_year_list, vertex_metrics,
                                 experiment_data_dir)
runtime = time.time() - start
print 'make_snapshot_vertex_metrics took %d seconds' % runtime


make_snapshot_vertex_metrics took 4 seconds

make edge dataframe


In [11]:
num_non_edges_to_add = len(G.es())
seed_edgedf = 432

In [22]:
start = time.time()
make_edge_df(G, experiment_data_dir, snapshot_year_list,
              num_non_edges_to_add, columns_to_use, seed=seed_edgedf)
runtime = time.time() - start
print 'make_edge_df took %d seconds' % runtime


make_edge_df took 474 seconds

run inference


In [24]:
start = time.time()
LogReg = fit_logistic_regression(experiment_data_dir, columns_to_use)
runtime = time.time() - start
print 'make_edge_df took %d seconds' % runtime


make_edge_df took 2 seconds

compute ranking metrics


In [29]:
columns_to_use = ['indegree', 's_pagerank', 'age', 'similarity']
R = 100

seed_ranking = 654242

In [33]:
start = time.time()
test_case_rank_scores = compute_ranking_metrics(G,
                                                LogReg,
                                                columns_to_use,
                                                experiment_data_dir,
                                                snapshot_year_list,
                                                R,
                                                seed=seed_ranking)
runtime = time.time() - start 
print 'compute_ranking_metrics took %d seconds for %d test cases (%1.3f)' % (runtime, R, runtime/R)


compute_ranking_metrics took 20 seconds for 100 test cases (0.203)

In [ ]: