This notebook downloads and cleans the SCOTUS subnetwork data. It can be modified to create any jurisdiction subnetwork and also the federal appelate subnetwork.
You have to modify the two paths in the cell below for your own computer.
This code is a little jenky and subject to change.
In [2]:
# modify these for your own computer
repo_directory = '/Users/iaincarmichael/Dropbox/Research/law/law-net/'
data_dir = '/Users/iaincarmichael/data/courtlistener/'
network_name is the subnetwork you want to work with. It can be either a single jurisdiction (scotus, ca1, etc) or a collection of jurisdiction (such as the federal appellate courts). Currently the federal appellate courts are implemented as 'federal'.
network_name is used in the make_network_data.py file. You can modify the get_courts
function in this file to create other collections of courts.
In [5]:
# which network to download data for
network_name = 'scotus' # 'federal', 'ca1', etc
In [6]:
import sys
# graph package
import igraph as ig
# our code
sys.path.append(repo_directory + 'code/')
from setup_data_dir import setup_data_dir, make_subnetwork_directory
from pipeline.download_data import download_bulk_resource, download_master_edgelist, download_scdb
from helpful_functions import case_info
sys.path.append(repo_directory + 'vertex_metrics_experiment/code/')
from make_network_data import *
from make_graph import make_graph
from bag_of_words import make_tf_idf
# some sub directories that get used
raw_dir = data_dir + 'raw/'
subnet_dir = data_dir + network_name + '/'
text_dir = subnet_dir + 'textfiles/'
# jupyter notebook settings
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [ ]:
setup_data_dir(data_dir)
In [ ]:
make_subnetwork_directory(data_dir, network_name)
In [ ]:
download_op_and_cl_files(data_dir, network_name)
In [ ]:
download_master_edgelist(data_dir)
In [ ]:
download_scdb(data_dir)
In [ ]:
# create the raw case metadata data frame in the raw/ folder
make_subnetwork_raw_case_metadata(data_dir, network_name)
In [ ]:
# create clean case metadata and edgelist from raw data
clean_metadata_and_edgelist(data_dir, network_name)
In [12]:
make_graph(subnet_dir, network_name)
In [ ]:
# make the textfiles for give court
make_network_textfiles(data_dir, network_name)
In [ ]:
make_tf_idf(text_dir, subnet_dir + 'nlp/')
In [7]:
# load the graph
G = ig.Graph.Read_GraphML(subnet_dir + network_name +'_network.graphml')
In [8]:
G.summary()
Out[8]: