notebook.community

Edit and run



In [2]:

    
# Static content
import numpy as np
from sklearn.metrics import euclidean_distances
from sklearn import manifold
import pandas.io


def simple_vote(V):
    if V == 0:
        return 0
    if V >= 1 and V <= 3:
        return 1
    if V >3 and V <= 6:
        return -1
    if V > 6 and V <= 9:
        return 0
    return nan

datadir = "data"
datafilename = "%s/congress/hou112kh.ord" % datadir
dictionaryfilename = "%s/congress/h112desc.csv" % datadir



In [3]:

    
votedescriptions = pandas.io.parsers.read_csv(dictionaryfilename)
print("Columns: %s" % ", ".join(votedescriptions.columns))
print("Number of votes: %d" % len(votedescriptions))









    



Columns: date, session, number, bill, question, result, description, yeatotal, naytotal
Number of votes: 1602



In [4]:

    
data = []
vp = None
for line in open(datafilename, "r"):
    record = (line[:3], line[3:8], line[8:10].strip(), line[10:12].strip(), line[12:20].strip(), line[20:23], line[23:25], line[25:36].strip(), list(line[36:].strip()))
    if vp is None and record[2] == "99":
        vp = record
        continue
    data.append(record)



In [5]:

    
votes = {}
party = {}
for person in data:
    voterecord = np.array([simple_vote(int(vote)) for vote in person[-1]])
    name = person[7]
    if name in votes:
        name += person[3]
    if name in votes:
        name += "%d" % len([samename for samename in votes if samename == name])
    votes[name] = voterecord
    if person[5] == '100':
        party[name] = "DEM"
    elif person[5] == "200":
        party[name] = "REP"
    else:
        party[name] = "OTH"



In [6]:

    
# How many and where?
for name in ("DEM", "REP", "OTH"):
    print("%s: %d" % (name, len([p for p in party.iteritems() if p[1] == name])))









    



DEM: 200
REP: 245
OTH: 0



In [7]:

    
# Verify array lengths. (Senators excludes VP)
counts = []
for v in votes.values():
    counts.append(len(v))
num_senators = len(votes)
num_votes = np.max(counts)
print("Max votes: %d" % num_votes)
print("Min votes: %d" % np.min(counts))
print("Senators: %d" % num_senators)
if num_votes != len(votedescriptions):
    print("Missing discriptions")









    



Max votes: 1602
Min votes: 1602
Senators: 445



In [8]:

    
# Make Matrices
votematrix = np.ndarray((num_senators, num_votes))
senatorids = {}
idsenators = {} # inverse of senatorids for convenience
total_votes = np.ndarray((num_senators, 1))
idx = 0
for (name, vote) in votes.iteritems():
    senatorids[name] = idx
    idsenators[idx] = name
    votematrix[idx,:] = vote
    total_votes[idx] = sum(vote.dot(vote))
    idx += 1
correlation = votematrix.dot(votematrix.transpose())

print("Correlation sample subset")
print(correlation[0:5,0:5])

distances = euclidean_distances(correlation)
print("\nDistances")
print(distances[0:5, 0:5])









    



Correlation sample subset
[[ 1585.  -504.  1076.  1109.  -489.]
 [ -504.  1584.  -463.  -616.  1388.]
 [ 1076.  -463.  1565.  1042.  -461.]
 [ 1109.  -616.  1042.  1538.  -611.]
 [ -489.  1388.  -461.  -611.  1596.]]

Distances
[[     0.          35970.61891322   1656.4407626    2007.03163901
   36176.59615   ]
 [ 35970.61891322      0.          34641.98688009  37523.19881087
     498.27502446]
 [  1656.4407626   34641.98688009      0.           3082.82824692
   34849.17429438]
 [  2007.03163901  37523.19881087   3082.82824692      0.          37731.25758572]
 [ 36176.59615       498.27502446  34849.17429438  37731.25758572      0.        ]]



In [9]:

    
# Example distances
def get_distance(sen1, sen2):
    return distances[senatorids[sen1], senatorids[sen2]]

print("Pelosi v Cantor")
print(get_distance('PELOSI', 'CANTOR'))
print("Ryan v Cantor")
print(get_distance('RYAN1', 'CANTOR'))

furthest = max(distances.flatten())
mask = np.identity(distances.shape[0]) + distances
np.fill_diagonal(mask, nan)
mask = np.logical_not(isnan(mask))
closest = min(distances[mask].flatten())
tmp = ["%s & %s" % (idsenators[loc[0]], idsenators[loc[1]]) for loc in np.where(distances == furthest)]
print("Furthest (%f): %s" % (furthest, ",".join(tmp[:len(tmp)/2])))
tmp = ["%s & %s" % (idsenators[loc[0]], idsenators[loc[1]]) for loc in np.where(distances == closest)]
print("Closest (%f): %s" % (closest, ",".join(tmp[:len(tmp)/2])))









    



Pelosi v Cantor
37546.3862442
Ryan v Cantor
1617.26404771
Furthest (42098.022828): MCGOVERN & POMPEO
Closest (53.366656): DELBENE & PAYNE10



In [10]:

    
# Calculate MDS
seed = np.random.RandomState(seed=3)
mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
                   dissimilarity="precomputed", n_jobs=1)
pos = mds.fit_transform(distances)



In [11]:

    
# Plot MDS
figure(figsize=(24, 24))
plt.scatter(pos[:, 0], pos[:, 1], s=5, c='g')
idx = 0
while idx < pos.shape[0]:
    loc = pos[idx, :]
    name = idsenators[idx]
    p = party[name]
    if p == "DEM":
        color = "blue"
    elif p == "REP":
        color = "red"
    else:
        color = "green"
    plt.annotate(name, loc, color=color)
    idx += 1



In [12]:

    
print("Distances of CANTOR (%s) to:" % party['CANTOR'])
for senator in ('COHEN',):
    print("%s (%s): %f" % (senator, party[senator], get_distance('CANTOR', senator)))

print("\n")
print("Example locations:")
for senator in ("BOEHNER",):
    print("Senator %s (%s) @ %s" % (senator, party[senator], pos[senatorids[senator],:]))









    



Distances of CANTOR (REP) to:
COHEN (DEM): 38184.877085


Example locations:
Senator BOEHNER (REP) @ [ 1111.90898262 -6020.54971378]



In [13]:

    
# What's up with AMASH?
schatz = senatorids['AMASH']
print("Schatz voted %d times." % total_votes[schatz])
print("Average number of votes is %d." % mean(total_votes))
print("First vote (one indexed): %d" % (np.min(np.where(votes['AMASH'] != 0)) + 1))
print("\n")

# Least Voters (less than 400)
threshold = 800
print("People voting less than %d times. First and last are 1-indexed." % threshold)
for idx in xrange(0, len(total_votes)):
    if total_votes[idx] >= 400:
        continue
    name = idsenators[idx]
    indices = np.where(votes[name] != 0)
    print("%s (%s): Voted %d times. First on %d. Last on %d." % (name, party[name], total_votes[idx], np.min(indices) + 1, np.max(indices) + 1))









    



Schatz voted 1561 times.
Average number of votes is 1496.
First vote (one indexed): 1


People voting less than 800 times. First and last are 1-indexed.
BARBER (DEM): Voted 241 times. First on 1355. Last on 1602.
DELBENE (DEM): Voted 55 times. First on 1548. Last on 1602.
HARMAN (DEM): Voted 88 times. First on 1. Last on 101.
MASSIE (REP): Voted 55 times. First on 1548. Last on 1602.
PAYNE10 (DEM): Voted 53 times. First on 1549. Last on 1602.
BOEHNER (REP): Voted 9 times. First on 12. Last on 1602.
CURSON (DEM): Voted 55 times. First on 1548. Last on 1602.
HELLER (REP): Voted 290 times. First on 1. Last on 296.
LEE26 (REP): Voted 26 times. First on 1. Last on 26.
GIFFORDS (DEM): Voted 11 times. First on 1. Last on 955.



In [14]:

    
giffords = votedescriptions.ix[np.where(votes['GIFFORDS'] != 0)]
pandas.concat([giffords, pandas.DataFrame(np.array(votes['GIFFORDS'])[np.where(votes['GIFFORDS'] != 0)],columns=("vote",))], axis=1)









    Out[14]:






  
    
      
      date
      session
      number
      bill
      question
      result
      description
      yeatotal
      naytotal
      vote
    
  
  
    
      0  
       2011-01-05
       1st
         2
            NaN
                        Election of the Speaker
       Boehner
                                                     NaN
       NaN
           
       -1
    
    
      1  
       2011-01-05
       1st
         3
        H RES 5
                             On Motion to Table
        Passed
       Adopting rules for the One Hundred Twelfth Con...
       223
       188 
       -1
    
    
      2  
       2011-01-05
       1st
         4
        H RES 5
              On Ordering the Previous Question
        Passed
       Adopting rules for the One Hundred Twelfth Con...
       236
       188 
       -1
    
    
      3  
       2011-01-05
       1st
         5
        H RES 5
                            On Motion to Commit
        Failed
       Adopting rules for the One Hundred Twelfth Con...
       191
       238 
        1
    
    
      4  
       2011-01-05
       1st
         6
        H RES 5
                  On Agreeing to the Resolution
        Passed
       Adopting rules for the One Hundred Twelfth Con...
       238
       191 
       -1
    
    
      5  
       2011-01-06
       1st
         8
       H RES 22
       On Motion to Suspend the Rules and Agree
        Passed
       Reducing the amount authorized for salaries an...
       408
        13 
        1
    
    
      6  
       2011-01-07
       1st
         9
       H RES 26
              On Ordering the Previous Question
        Passed
       Providing for consideration of H.R. 2, to repe...
       236
       182 
       -1
    
    
      7  
       2011-01-07
       1st
        10
       H RES 26
                  On Agreeing to the Resolution
        Passed
       Providing for consideration of H.R. 2, to repe...
       236
       181 
       -1
    
    
      8  
       2011-01-07
       1st
        11
       H RES 27
                  On Agreeing to the Resolution
        Passed
       Relating to the status of certain actions take...
       257
       159 
       -1
    
    
      9  
              NaN
       NaN
       NaN
            NaN
                                            NaN
           NaN
                                                     NaN
       NaN
        NaN
        1
    
    
      10 
              NaN
       NaN
       NaN
            NaN
                                            NaN
           NaN
                                                     NaN
       NaN
        NaN
        1
    
    
      685
       2011-08-01
       1st
       690
          S 365
                                     On Passage
        Passed
       To make a technical amendment to the Education...
       269
       161 
      NaN
    
    
      954
       2012-01-25
       2nd
        11
       H R 3801
        On Motion to Suspend the Rules and Pass
        Passed
            Ultralight Aircraft Smuggling Prevention Act
       408
         0 
      NaN

	date	session	number	bill	question	result	description	yeatotal	naytotal	vote
0	2011-01-05	1st	2	NaN	Election of the Speaker	Boehner	NaN	NaN		-1
1	2011-01-05	1st	3	H RES 5	On Motion to Table	Passed	Adopting rules for the One Hundred Twelfth Con...	223	188	-1
2	2011-01-05	1st	4	H RES 5	On Ordering the Previous Question	Passed	Adopting rules for the One Hundred Twelfth Con...	236	188	-1
3	2011-01-05	1st	5	H RES 5	On Motion to Commit	Failed	Adopting rules for the One Hundred Twelfth Con...	191	238	1
4	2011-01-05	1st	6	H RES 5	On Agreeing to the Resolution	Passed	Adopting rules for the One Hundred Twelfth Con...	238	191	-1
5	2011-01-06	1st	8	H RES 22	On Motion to Suspend the Rules and Agree	Passed	Reducing the amount authorized for salaries an...	408	13	1
6	2011-01-07	1st	9	H RES 26	On Ordering the Previous Question	Passed	Providing for consideration of H.R. 2, to repe...	236	182	-1
7	2011-01-07	1st	10	H RES 26	On Agreeing to the Resolution	Passed	Providing for consideration of H.R. 2, to repe...	236	181	-1
8	2011-01-07	1st	11	H RES 27	On Agreeing to the Resolution	Passed	Relating to the status of certain actions take...	257	159	-1
9	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1
10	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1
685	2011-08-01	1st	690	S 365	On Passage	Passed	To make a technical amendment to the Education...	269	161	NaN
954	2012-01-25	2nd	11	H R 3801	On Motion to Suspend the Rules and Pass	Passed	Ultralight Aircraft Smuggling Prevention Act	408	0	NaN