In [2]:
# Static content
import numpy as np
from sklearn.metrics import euclidean_distances
from sklearn import manifold
import pandas.io


def simple_vote(V):
    if V == 0:
        return 0
    if V >= 1 and V <= 3:
        return 1
    if V >3 and V <= 6:
        return -1
    if V > 6 and V <= 9:
        return 0
    return nan

datadir = "data"
datafilename = "%s/congress/hou112kh.ord" % datadir
dictionaryfilename = "%s/congress/h112desc.csv" % datadir

In [3]:
votedescriptions = pandas.io.parsers.read_csv(dictionaryfilename)
print("Columns: %s" % ", ".join(votedescriptions.columns))
print("Number of votes: %d" % len(votedescriptions))


Columns: date, session, number, bill, question, result, description, yeatotal, naytotal
Number of votes: 1602

In [4]:
data = []
vp = None
for line in open(datafilename, "r"):
    record = (line[:3], line[3:8], line[8:10].strip(), line[10:12].strip(), line[12:20].strip(), line[20:23], line[23:25], line[25:36].strip(), list(line[36:].strip()))
    if vp is None and record[2] == "99":
        vp = record
        continue
    data.append(record)

In [5]:
votes = {}
party = {}
for person in data:
    voterecord = np.array([simple_vote(int(vote)) for vote in person[-1]])
    name = person[7]
    if name in votes:
        name += person[3]
    if name in votes:
        name += "%d" % len([samename for samename in votes if samename == name])
    votes[name] = voterecord
    if person[5] == '100':
        party[name] = "DEM"
    elif person[5] == "200":
        party[name] = "REP"
    else:
        party[name] = "OTH"

In [6]:
# How many and where?
for name in ("DEM", "REP", "OTH"):
    print("%s: %d" % (name, len([p for p in party.iteritems() if p[1] == name])))


DEM: 200
REP: 245
OTH: 0

In [7]:
# Verify array lengths. (Senators excludes VP)
counts = []
for v in votes.values():
    counts.append(len(v))
num_senators = len(votes)
num_votes = np.max(counts)
print("Max votes: %d" % num_votes)
print("Min votes: %d" % np.min(counts))
print("Senators: %d" % num_senators)
if num_votes != len(votedescriptions):
    print("Missing discriptions")


Max votes: 1602
Min votes: 1602
Senators: 445

In [8]:
# Make Matrices
votematrix = np.ndarray((num_senators, num_votes))
senatorids = {}
idsenators = {} # inverse of senatorids for convenience
total_votes = np.ndarray((num_senators, 1))
idx = 0
for (name, vote) in votes.iteritems():
    senatorids[name] = idx
    idsenators[idx] = name
    votematrix[idx,:] = vote
    total_votes[idx] = sum(vote.dot(vote))
    idx += 1
correlation = votematrix.dot(votematrix.transpose())

print("Correlation sample subset")
print(correlation[0:5,0:5])

distances = euclidean_distances(correlation)
print("\nDistances")
print(distances[0:5, 0:5])


Correlation sample subset
[[ 1585.  -504.  1076.  1109.  -489.]
 [ -504.  1584.  -463.  -616.  1388.]
 [ 1076.  -463.  1565.  1042.  -461.]
 [ 1109.  -616.  1042.  1538.  -611.]
 [ -489.  1388.  -461.  -611.  1596.]]

Distances
[[     0.          35970.61891322   1656.4407626    2007.03163901
   36176.59615   ]
 [ 35970.61891322      0.          34641.98688009  37523.19881087
     498.27502446]
 [  1656.4407626   34641.98688009      0.           3082.82824692
   34849.17429438]
 [  2007.03163901  37523.19881087   3082.82824692      0.          37731.25758572]
 [ 36176.59615       498.27502446  34849.17429438  37731.25758572      0.        ]]

In [9]:
# Example distances
def get_distance(sen1, sen2):
    return distances[senatorids[sen1], senatorids[sen2]]

print("Pelosi v Cantor")
print(get_distance('PELOSI', 'CANTOR'))
print("Ryan v Cantor")
print(get_distance('RYAN1', 'CANTOR'))

furthest = max(distances.flatten())
mask = np.identity(distances.shape[0]) + distances
np.fill_diagonal(mask, nan)
mask = np.logical_not(isnan(mask))
closest = min(distances[mask].flatten())
tmp = ["%s & %s" % (idsenators[loc[0]], idsenators[loc[1]]) for loc in np.where(distances == furthest)]
print("Furthest (%f): %s" % (furthest, ",".join(tmp[:len(tmp)/2])))
tmp = ["%s & %s" % (idsenators[loc[0]], idsenators[loc[1]]) for loc in np.where(distances == closest)]
print("Closest (%f): %s" % (closest, ",".join(tmp[:len(tmp)/2])))


Pelosi v Cantor
37546.3862442
Ryan v Cantor
1617.26404771
Furthest (42098.022828): MCGOVERN & POMPEO
Closest (53.366656): DELBENE & PAYNE10

In [10]:
# Calculate MDS
seed = np.random.RandomState(seed=3)
mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
                   dissimilarity="precomputed", n_jobs=1)
pos = mds.fit_transform(distances)

In [11]:
# Plot MDS
figure(figsize=(24, 24))
plt.scatter(pos[:, 0], pos[:, 1], s=5, c='g')
idx = 0
while idx < pos.shape[0]:
    loc = pos[idx, :]
    name = idsenators[idx]
    p = party[name]
    if p == "DEM":
        color = "blue"
    elif p == "REP":
        color = "red"
    else:
        color = "green"
    plt.annotate(name, loc, color=color)
    idx += 1



In [12]:
print("Distances of CANTOR (%s) to:" % party['CANTOR'])
for senator in ('COHEN',):
    print("%s (%s): %f" % (senator, party[senator], get_distance('CANTOR', senator)))

print("\n")
print("Example locations:")
for senator in ("BOEHNER",):
    print("Senator %s (%s) @ %s" % (senator, party[senator], pos[senatorids[senator],:]))


Distances of CANTOR (REP) to:
COHEN (DEM): 38184.877085


Example locations:
Senator BOEHNER (REP) @ [ 1111.90898262 -6020.54971378]

In [13]:
# What's up with AMASH?
schatz = senatorids['AMASH']
print("Schatz voted %d times." % total_votes[schatz])
print("Average number of votes is %d." % mean(total_votes))
print("First vote (one indexed): %d" % (np.min(np.where(votes['AMASH'] != 0)) + 1))
print("\n")

# Least Voters (less than 400)
threshold = 800
print("People voting less than %d times. First and last are 1-indexed." % threshold)
for idx in xrange(0, len(total_votes)):
    if total_votes[idx] >= 400:
        continue
    name = idsenators[idx]
    indices = np.where(votes[name] != 0)
    print("%s (%s): Voted %d times. First on %d. Last on %d." % (name, party[name], total_votes[idx], np.min(indices) + 1, np.max(indices) + 1))


Schatz voted 1561 times.
Average number of votes is 1496.
First vote (one indexed): 1


People voting less than 800 times. First and last are 1-indexed.
BARBER (DEM): Voted 241 times. First on 1355. Last on 1602.
DELBENE (DEM): Voted 55 times. First on 1548. Last on 1602.
HARMAN (DEM): Voted 88 times. First on 1. Last on 101.
MASSIE (REP): Voted 55 times. First on 1548. Last on 1602.
PAYNE10 (DEM): Voted 53 times. First on 1549. Last on 1602.
BOEHNER (REP): Voted 9 times. First on 12. Last on 1602.
CURSON (DEM): Voted 55 times. First on 1548. Last on 1602.
HELLER (REP): Voted 290 times. First on 1. Last on 296.
LEE26 (REP): Voted 26 times. First on 1. Last on 26.
GIFFORDS (DEM): Voted 11 times. First on 1. Last on 955.

In [14]:
giffords = votedescriptions.ix[np.where(votes['GIFFORDS'] != 0)]
pandas.concat([giffords, pandas.DataFrame(np.array(votes['GIFFORDS'])[np.where(votes['GIFFORDS'] != 0)],columns=("vote",))], axis=1)


Out[14]:
date session number bill question result description yeatotal naytotal vote
0 2011-01-05 1st 2 NaN Election of the Speaker Boehner NaN NaN -1
1 2011-01-05 1st 3 H RES 5 On Motion to Table Passed Adopting rules for the One Hundred Twelfth Con... 223 188 -1
2 2011-01-05 1st 4 H RES 5 On Ordering the Previous Question Passed Adopting rules for the One Hundred Twelfth Con... 236 188 -1
3 2011-01-05 1st 5 H RES 5 On Motion to Commit Failed Adopting rules for the One Hundred Twelfth Con... 191 238 1
4 2011-01-05 1st 6 H RES 5 On Agreeing to the Resolution Passed Adopting rules for the One Hundred Twelfth Con... 238 191 -1
5 2011-01-06 1st 8 H RES 22 On Motion to Suspend the Rules and Agree Passed Reducing the amount authorized for salaries an... 408 13 1
6 2011-01-07 1st 9 H RES 26 On Ordering the Previous Question Passed Providing for consideration of H.R. 2, to repe... 236 182 -1
7 2011-01-07 1st 10 H RES 26 On Agreeing to the Resolution Passed Providing for consideration of H.R. 2, to repe... 236 181 -1
8 2011-01-07 1st 11 H RES 27 On Agreeing to the Resolution Passed Relating to the status of certain actions take... 257 159 -1
9 NaN NaN NaN NaN NaN NaN NaN NaN NaN 1
10 NaN NaN NaN NaN NaN NaN NaN NaN NaN 1
685 2011-08-01 1st 690 S 365 On Passage Passed To make a technical amendment to the Education... 269 161 NaN
954 2012-01-25 2nd 11 H R 3801 On Motion to Suspend the Rules and Pass Passed Ultralight Aircraft Smuggling Prevention Act 408 0 NaN