In [126]:
import sys

sys.path.append('../../code/')
import os
import json
from datetime import datetime
import time
from math import *



import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import igraph as ig


from seaborn.apionly import color_palette

from load_data import load_citation_network_igraph, case_info
from helper_functions import rankdata_reverse

from dim_reduction import *
from viz import *
from color_palettes import *



%load_ext autoreload
%autoreload 2
%matplotlib inline

data_dir = '../../data/'
court_name = 'scotus'


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

load into igraph


In [2]:
g_d = load_citation_network_igraph(data_dir, 'scotus', directed=True)
# g_u = load_citation_network_igraph(data_dir, 'scotus', directed=False)


0 seconds for 250465 edges

In [3]:
case_metrics = pd.DataFrame(index=range(len(g_d.vs)))
case_metrics['id'] = g_d.vs['name']
case_metrics['year'] = g_d.vs['year']


# run on directed graph
case_metrics['authority_d'] = g_d.authority_score()
case_metrics['indegree_d'] = g_d.indegree()
case_metrics['outdegree_d'] = g_d.outdegree()
case_metrics['hubs_d'] = g_d.hub_score()
case_metrics['betweenness_d'] = g_d.betweenness()
case_metrics['eigen_d'] = g_d.eigenvector_centrality()
# case_metrics['closeness_d'] = g_d.closeness()
case_metrics['pagerank_d'] = g_d.pagerank()

# # run on undirected graph
# case_metrics['authority_u'] = g_u.authority_score()
# case_metrics['indegree_u'] = g_u.indegree()
# case_metrics['outdegree_u'] = g_u.outdegree()
# case_metrics['hubs_u'] = g_u.hub_score()
# case_metrics['betweenness_u'] = g_u.betweenness()
# case_metrics['eigen_u'] = g_u.eigenvector_centrality()
# case_metrics['closeness_u'] = g_u.closeness()
# case_metrics['pagerank_u'] = g_u.pagerank()

In [4]:
case_metrics


Out[4]:
id year authority_d indegree_d outdegree_d hubs_d betweenness_d eigen_d pagerank_d
0 100000 1922 1.464909e-05 1 0 1.126011e-16 0.000000 5.136952e-11 0.000011
1 100001 1922 9.262362e-06 1 0 1.126011e-16 0.000000 3.756351e-12 0.000014
2 100002 1922 4.501513e-03 7 3 2.359661e-04 14407.810468 1.442598e-07 0.000015
3 100003 1922 6.525035e-03 14 2 2.717687e-03 4220.797681 5.368482e-07 0.000027
4 100004 1922 4.490624e-04 4 1 1.349067e-04 0.000000 1.012862e-06 0.000019
5 100005 1922 3.008944e-03 9 12 6.367995e-03 62883.886953 4.601221e-06 0.000029
6 100006 1922 8.728483e-05 5 2 7.744271e-05 5540.311615 8.635227e-08 0.000036
7 100007 1922 3.056307e-03 21 7 1.319972e-03 71883.418263 6.853056e-06 0.000053
8 100008 1922 5.469286e-17 0 0 1.126011e-16 0.000000 0.000000e+00 0.000010
9 100009 1922 2.788087e-03 21 12 2.004272e-03 23407.837089 4.076218e-06 0.000049
10 100010 1922 6.526426e-02 56 27 1.043160e-02 245682.024440 2.408218e-06 0.000104
11 100011 1922 4.228985e-03 34 18 1.808507e-03 85487.343350 4.036935e-06 0.000101
12 100012 1922 3.658398e-05 1 7 1.413141e-03 11181.267689 2.321506e-09 0.000011
13 100013 1922 3.926284e-05 4 0 4.504044e-16 0.000000 5.292277e-08 0.000018
14 100014 1922 2.570674e-04 4 14 1.299256e-03 6327.616810 4.532767e-10 0.000015
15 100015 1922 2.220259e-03 9 7 4.029313e-03 47148.263401 1.614906e-06 0.000037
16 100016 1922 1.827062e-05 2 16 3.054592e-03 45259.406717 2.931386e-07 0.000014
17 100017 1922 2.739210e-16 0 5 1.430340e-03 0.000000 0.000000e+00 0.000010
18 100018 1922 1.649556e-02 38 14 1.310686e-02 243591.501347 1.232668e-05 0.000198
19 100019 1922 1.093857e-16 0 2 1.012494e-05 0.000000 0.000000e+00 0.000010
20 100020 1922 6.941326e-03 26 9 1.923581e-03 38115.572991 8.423311e-07 0.000067
21 100021 1922 4.337788e-04 3 2 2.558778e-04 11918.616564 2.348952e-06 0.000016
22 100022 1922 3.914549e-03 10 5 3.588614e-03 19570.022318 6.051530e-06 0.000055
23 100023 1922 6.719853e-02 24 9 2.827839e-02 31978.255141 6.997789e-06 0.000152
24 100024 1922 2.620287e-03 3 5 3.343476e-03 6785.783713 2.043757e-06 0.000018
25 100025 1922 2.379345e-02 23 18 6.287093e-03 203177.846000 4.437616e-06 0.000046
26 100026 1922 1.367307e-04 3 1 2.655172e-04 6956.645969 4.845204e-11 0.000016
27 100027 1922 2.791939e-05 1 1 4.749909e-05 0.000000 0.000000e+00 0.000011
28 100028 1922 1.146170e-03 10 0 1.128940e-15 0.000000 3.284850e-08 0.000031
29 100029 1922 7.811271e-08 1 0 1.126011e-16 0.000000 0.000000e+00 0.000016
... ... ... ... ... ... ... ... ... ...
33218 99970 1922 1.103243e-06 2 6 7.044167e-04 10637.457755 3.125347e-08 0.000024
33219 99971 1922 1.153513e-03 4 7 2.164587e-03 11908.897813 9.071377e-08 0.000018
33220 99972 1922 1.262143e-04 5 1 2.845454e-05 794.573381 1.010483e-09 0.000028
33221 99973 1922 2.183942e-02 5 5 5.385445e-03 4276.523164 3.710796e-11 0.000015
33222 99974 1922 9.774918e-03 29 12 2.407568e-03 104857.347229 2.803963e-06 0.000068
33223 99975 1922 9.820915e-03 14 14 8.472657e-03 51907.216109 6.836847e-06 0.000028
33224 99976 1922 3.131439e-04 9 2 8.584187e-06 6746.321729 1.334967e-06 0.000065
33225 99977 1922 3.110491e-02 42 37 3.818783e-02 439325.374800 1.977880e-05 0.000106
33226 99978 1922 3.191141e-03 19 4 2.007651e-04 130349.442454 5.331916e-06 0.000090
33227 99979 1922 2.282219e-04 6 1 3.317537e-05 2400.052782 1.298098e-06 0.000027
33228 99980 1922 4.824789e-04 5 0 5.644699e-16 0.000000 1.955391e-06 0.000022
33229 99981 1922 7.103159e-05 3 0 3.380092e-16 0.000000 1.275720e-06 0.000023
33230 99982 1922 5.389026e-03 27 8 5.892744e-03 133457.841855 5.146616e-07 0.000072
33231 99983 1922 7.596603e-03 31 34 5.301678e-03 421997.137547 2.700681e-06 0.000078
33232 99984 1922 1.354441e-04 1 1 7.076898e-07 56.418188 0.000000e+00 0.000011
33233 99985 1922 1.604060e-02 11 9 1.478726e-02 47670.384798 1.412323e-06 0.000039
33234 99986 1922 1.115366e-02 9 1 2.137389e-05 233.698845 1.742016e-07 0.000016
33235 99987 1922 3.052774e-02 31 5 7.106527e-03 25832.848784 6.367981e-06 0.000077
33236 99988 1922 2.272744e-02 33 26 2.971438e-02 174515.872333 7.028997e-06 0.000078
33237 99989 1922 8.660902e-04 4 2 3.148922e-05 19347.851339 1.483482e-08 0.000026
33238 99990 1922 7.783377e-04 4 10 3.768974e-03 62947.712431 1.246920e-06 0.000021
33239 99991 1922 1.265132e-04 3 8 5.841257e-04 8881.026045 2.802547e-07 0.000016
33240 99992 1922 1.923459e-05 4 1 1.112373e-04 19428.000000 2.335495e-10 0.000037
33241 99993 1922 4.720655e-03 16 14 3.065482e-03 110939.286537 1.278648e-06 0.000039
33242 99994 1922 2.117685e-02 10 18 1.138531e-02 106675.713899 9.366960e-07 0.000040
33243 99995 1922 1.844836e-03 3 6 1.006194e-03 1845.936160 0.000000e+00 0.000012
33244 99996 1922 1.167164e-03 3 7 1.534101e-04 10098.082932 0.000000e+00 0.000013
33245 99997 1922 1.311970e-05 1 11 8.702158e-04 6341.716153 1.148679e-10 0.000011
33246 99998 1922 3.117797e-03 10 5 2.294124e-04 43351.409855 1.595818e-06 0.000027
33247 99999 1922 1.052744e-04 3 10 2.820755e-04 5147.901818 1.146593e-07 0.000017

33248 rows × 9 columns


In [9]:
# put metrics in data frame

metrics = case_metrics.columns.tolist()[2:]
X = case_metrics[metrics]

In [18]:
# PCA of metrics

U, D, V = get_PCA(X, scale=True)
scores = np.dot(U, np.diag(D))

In [113]:
# cases colored by year

case_years = case_metrics['year'].tolist() # case years
start_year = min(case_years)
years0 = [y -  start_year for y in case_years] # case years beginning at zero

year_palette =  color_palette("PuBu", max(years0) +1 )

case_year_colors = [year_palette[y] for y in years0]

In [114]:
plot_scores(scores, 
            start=1,
            n_comp=5, 
            palette = case_year_colors,
            title='PCA of vertex metrics')



In [128]:
plt.figure(figsize=[10, 10])
plt.title('loadings plot of vertex metrics')
d = len(metrics)
for k in range(d):
    plt.plot(range(d),
             V[k],
             marker='o',
             color=color_palette("PuBu", d)[d-k-1],
             label='loading %d' % k)
    
             # alpha = 1 - (k + 0.0)/d)
plt.axhline(0, ls='--', color='red', alpha=.5)
plt.legend(loc='upper right', bbox_to_anchor=(1.5, 1))

plt.xlabel('metric')
plt.ylabel('loading')

print metrics


['authority_d', 'indegree_d', 'outdegree_d', 'hubs_d', 'betweenness_d', 'eigen_d', 'pagerank_d']

In [116]:
y = case_metrics['pagerank_d']
years = case_metrics['year']
plt.scatter(years,
            y,
            color=case_year_colors)
plt.ylim([0, max(y)])
plt.xlim([min(years), max(years)])


Out[116]:
(1754, 2016)

In [127]:
plot_scatter_matrix(case_metrics)



In [ ]: