In [1]:
import sys
sys.path.append('../../code/')
import os
import json
from datetime import datetime
import time
from math import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import igraph as ig
from load_data import load_citation_network_igraph, case_info
from helper_functions import rankdata_reverse
%load_ext autoreload
%autoreload 2
%matplotlib inline
data_dir = '../../data/'
court_name = 'scotus'
In [2]:
g_d = load_citation_network_igraph(data_dir, 'scotus', directed=True)
g_u = load_citation_network_igraph(data_dir, 'scotus', directed=False)
In [3]:
case_metrics = pd.DataFrame(index=range(len(g_d.vs)))
case_metrics['id'] = g_d.vs['name']
case_metrics['year'] = g_d.vs['year']
# run on directed graph
case_metrics['authority_d'] = g_d.authority_score()
case_metrics['indegree_d'] = g_d.indegree()
case_metrics['outdegree_d'] = g_d.outdegree()
case_metrics['hubs_d'] = g_d.hub_score()
case_metrics['betweenness_d'] = g_d.betweenness()
case_metrics['eigen_d'] = g_d.eigenvector_centrality()
case_metrics['closeness_d'] = g_d.closeness()
case_metrics['pagerank_d'] = g_d.pagerank()
# run on undirected graph
case_metrics['authority_u'] = g_u.authority_score()
case_metrics['indegree_u'] = g_u.indegree()
case_metrics['outdegree_u'] = g_u.outdegree()
case_metrics['hubs_u'] = g_u.hub_score()
case_metrics['betweenness_u'] = g_u.betweenness()
case_metrics['eigen_u'] = g_u.eigenvector_centrality()
case_metrics['closeness_u'] = g_u.closeness()
case_metrics['pagerank_u'] = g_u.pagerank()
In [ ]:
case_metrics[case_metrics]
In [7]:
# case_metrics.to_csv(data_dir + 'analysis/case_metrics.csv', index=False)
# pd.read_csv(data_dir + 'analysis/case_metrics.csv')
In [26]:
case_ranks = pd.DataFrame(index=case_metrics.index,
columns=case_metrics.columns)
case_ranks['id'] = case_metrics['id']
case_ranks['year'] = case_metrics['year']
for col in case_metrics.columns[2:]:
case_ranks[col] = rankdata_reverse(case_metrics[col].tolist())
In [33]:
case_metrics.sort_values('pagerank_d', ascending=False)
Out[33]:
In [39]:
case_info(88661)
In [27]:
erie = case_metrics[case_metrics.id == 103012]
case_ranks[case_ranks.id == 103012]
Out[27]:
In [28]:
detroit = case_metrics[case_metrics.id == 96405]
case_ranks[case_ranks.id == 96405]
Out[28]:
In [40]:
slaughter = case_metrics[case_metrics.id == 88661]
case_ranks[case_ranks.id == 88661]
Out[40]:
In [5]:
plt.scatter(case_metrics['year'], case_metrics['closeness_d'])
plt.xlim([1750, 2016])
plt.ylim([0, .0003])
Out[5]:
In [22]:
plt.figure(figsize=[10, 10])
plt.scatter(case_metrics['year'],
case_metrics['authority_d'],
marker='.',
color='black')
# plt.scatter(detroit['year'],
# detroit['authority_d'],
# marker='.',
# color='red',
# s=1000)
plt.xlim([1750, 2016])
plt.ylim([0, 1])
plt.xlabel('year')
plt.ylabel('authority score ')
Out[22]:
In [18]:
plt.figure(figsize=[10, 10])
plt.scatter(case_metrics['year'],
case_metrics['pagerank_d'],
marker='.',
color='black')
plt.xlim([1750, 2016])
plt.ylim([0, .002])
plt.xlabel('year')
plt.ylabel('pagerank ')
Out[18]:
In [41]:
plt.figure(figsize=[10, 10])
plt.scatter(case_metrics['year'],
case_metrics['pagerank_d'],
marker='.',
color='black')
plt.scatter(slaughter['year'],
slaughter['pagerank_d'],
marker='.',
color='red',
s=1000)
plt.xlim([1750, 2016])
plt.ylim([0, .002])
plt.xlabel('year')
plt.ylabel('pagerank ')
Out[41]:
In [50]:
kendall_tau = pd.DataFrame(index=case_ranks.columns[1:], columns=case_ranks.columns[1:])
for ra in case_ranks.columns[1:]:
for rb in case_ranks.columns[1:]:
kendall_tau.loc[ra, rb] = stats.kendalltau(case_ranks[ra], case_ranks[rb]).correlation
kendall_tau
Out[50]:
In [53]:
spearman = pd.DataFrame(index=case_ranks.columns[1:], columns=case_ranks.columns[1:])
for ra in case_ranks.columns[1:]:
for rb in case_ranks.columns[1:]:
spearman.loc[ra, rb] = stats.spearmanr(case_ranks[ra], case_ranks[rb]).correlation
spearman
Out[53]:
In [ ]: