In [67]:
# modify these for your own computer
repo_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'
data_dir = '/Users/iaincarmichael/Documents/courtlistener/data/'
In [35]:
import os
import sys
import time
from math import *
import copy
import cPickle as pickle
# data
import numpy as np
import pandas as pd
# viz
import matplotlib.pyplot as plt
# graph
import igraph as ig
# our code
sys.path.append(repo_directory + 'code/')
from pipeline.download_data import download_bulk_resource, download_master_edgelist
sys.path.append(repo_directory + 'explore/vertex_metrics_experiment/code/')
from make_case_text_files import *
from bag_of_words import *
from similarity_matrix import *
from make_snapshots import *
from make_graph import *
from data_dir_setup import *
# court
court = 'scotus'
network_name = 'scotus'
# directory set up
raw_dir = data_dir + 'raw/'
experiment_data_dir = data_dir + network_name + '/'
text_dir = experiment_data_dir + 'textfiles/'
# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [ ]:
setup_data_dir(data_dir)
In [51]:
%time download_op_and_cl_files(data_dir, network_name)
In [ ]:
%time download_master_edgelist(data_dir)
In [ ]:
%time download_scdb(data_dir)
In [ ]:
# create the raw case metadata data frame in the raw/ folder
%time make_subnetwork_raw_case_metadata(data_dir, network_name)
In [ ]:
# create clean case metadata and edgelist from raw data
%time clean_metadata_and_edgelist(data_dir, network_name)
In [20]:
%time make_graph(experiment_data_dir, network_name)
In [32]:
# make the textfiles for give court
%time make_network_textfiles(data_dir, network_name)
In [33]:
%time make_tf_idf(text_dir, experiment_data_dir + 'nlp/', min_df=0, max_df=1)
In [68]:
# load the graph
G = ig.Graph.Read_GraphML(experiment_data_dir + 'scotus_network.graphml')
In [72]:
G.summary()
Out[72]:
In [ ]:
In [63]:
vertex_metrics = ['indegree', 'outdegree', 'degree',
'd_pagerank', 'authorities', 'hubs']
# add recent citations
vertex_metrics += ['recentcite_' + str(t) for t in 5 * np.arange(1, 6+1)]
active_years = range(1900, 2015 + 1)
In [65]:
%time make_snapshot_vertex_metrics(G, active_years, vertex_metrics, experiment_data_dir)
In [ ]: