In [1]:
from collections import defaultdict
from pymongo import MongoClient
import matplotlib

%matplotlib inline


client = MongoClient()
db = client["senators"]
coll = db["raw_xml"]

In [2]:
from sklearn.feature_selection import SelectKBest
from lxml import etree
from sklearn.feature_extraction import DictVectorizer
import numpy as np
from sklearn.feature_selection import chi2

def partisan_bills(votes):
    senator_names = [ name for name in votes.keys() if ("(R-" in name ) or ("(D-" in name) ]
    senators = [ votes[name] for name in senator_names ]
    truth = map(lambda name: int("(R-" in name), senator_names)
    
    bill_vectorizer = DictVectorizer()
    features = bill_vectorizer.fit_transform(senators)
    
    kbest = SelectKBest(k=25).fit(features, truth)
    top25 = np.argsort(kbest.pvalues_)[:25]
    bills = bill_vectorizer.get_feature_names()

    for cong, sess, vote_num in map(lambda i: bills[i], top25):
        raw_xml = coll.find({"congress": cong, "session": sess, "vote_num": vote_num},{"raw": 1})[0]['raw']
        root = etree.XML(raw_xml.encode('utf-8'))
        print "{} Congress, Session {}, Vote Number {}".format(cong, sess, vote_num)
        print root.xpath("//vote_question_text")[0].text
        print root.xpath("//vote_document_text")[0].text
        print

In [35]:
obama_votes = defaultdict(dict)

for cong in range(111,115):
    for roll_call in coll.find({"congress": cong},{"raw":0}):
        for member in roll_call["votes"]:
            obama_votes[member["name"]][(cong, roll_call["session"],roll_call["vote_num"])] = member["vote_score"]

In [38]:
partisan_bills(obama_votes)


114 Congress, Session 1, Vote Number 00119
On the Amendment S.Amdt. 951 to S.Con.Res. 11 (No short title on file)
To establish and fund a new Federal-State partnership to expand access to high-quality preschool programs for children from low- and moderate-income families, offset with revenue from closing loopholes.

114 Congress, Session 1, Vote Number 00117
On the Amendment S.Amdt. 842 to S.Con.Res. 11 (No short title on file)
To establish a deficit-neutral reserve fund relating to consumer fiancial protection.

114 Congress, Session 1, Vote Number 00111
On the Amendment S.Amdt. 1072 to S.Con.Res. 11 (No short title on file)
To provide additional resources to reject the Senate Republicans' proposed $435 billion in cuts to Medicare.

114 Congress, Session 1, Vote Number 00110
On the Amendment S.Amdt. 966 to S.Con.Res. 11 (No short title on file)
To establish a deficit-neutral reserve fund relating to offsetting the costs of operations against the Islamic State.

114 Congress, Session 1, Vote Number 00094
On the Amendment S.Amdt. 523 to S.Con.Res. 11 (No short title on file)
To prevent United States companies from getting tax benefits for moving jobs overseas, to end offshore tax loopholes including inversions, and to provide incentives for United States companies to relocate overseas jobs to the United States.

114 Congress, Session 1, Vote Number 00105
On the Amendment S.Amdt. 515 to S.Con.Res. 11 (No short title on file)
To establish a spending-neutral reserve fund relating to requiring the Federal Government to allow states to opt out of Common Core without penalty.

114 Congress, Session 1, Vote Number 00101
On the Amendment S.Amdt. 828 to S.Con.Res. 11 (No short title on file)
To provide additional resources to save student financial aid and keep college affordable for more than 8,000,000 low- and middle-income students by restoring the $89,000,000,000 in cuts to Federal Pell Grants in the Republican budget.

114 Congress, Session 1, Vote Number 00104
On the Amendment S.Amdt. 817 to S.Con.Res. 11 (No short title on file)
To establish a deficit-neutral reserve fund to provide tax benefits to patriot employers that invest in American jobs and provide fair pay and benefits to workers and to eliminate tax benefits for corporations that ship jobs overseas.

111 Congress, Session 1, Vote Number 00385
On the Cloture Motion S.Amdt. 3276 to S.Amdt. 2786 to H.R. 3590 (Service Members Home Ownership Tax Act of 2009)
To improve the bill.

111 Congress, Session 1, Vote Number 00330
On the Motion (Motion to Waive Rule XXVIII Re: H.R. 2996.)
A bill making appropriations for the Department of the Interior, environment, and related agencies for the fiscal year ending September 30, 2010, and for other purposes.

111 Congress, Session 2, Vote Number 00012
On the Amendment S.Amdt. 3305 to S.Amdt. 3299 to H.J.Res. 45 (No short title on file)
To reimpose statutory pay-as-you-go.

111 Congress, Session 2, Vote Number 00013
On the Amendment S.Amdt. 3299 to H.J.Res. 45 (No short title on file)
In the nature of a substitute.

114 Congress, Session 1, Vote Number 00007
On the Amendment S.Amdt. 33 to S.Amdt. 2 to S. 1 (Keystone XL Pipeline Act)
To conform citizen suits under the Endangered Species Act of 1973.

114 Congress, Session 1, Vote Number 00137
On the Amendment S.Amdt. 1114 to H.R. 2 (Medicare Access and CHIP Reauthorization Act of 2015)
To repeal the individual mandate.

114 Congress, Session 1, Vote Number 00091
On the Amendment S.Amdt. 801 to S.Con.Res. 11 (No short title on file)
To build on the Bipartisan Budget Act of 2013 by restoring a below-sequester level cut of $9,000,000,000 to nondefense discretionary spending in 2017, replacing sequestration in 2016 and 2017 and increasing funding above sequester levels by a total of $148,000,000,000 for the 2 years, increasing defense and nondefense discretionary spending above sequester levels by equal amounts, eliminating the overseas contingency operations gimmick contained in the committee-reported resolution, and offsetting the net increase in defense and nondefense discretionary spending by closing tax loopholes.

114 Congress, Session 1, Vote Number 00086
On the Amendment S.Amdt. 652 to S.Con.Res. 11 (No short title on file)
To make college more affordable for middle-class families by allowing borrowers with outstanding Federal and private student loans to refinance at the equivalent interest rates that were offered to Federal student loan borrowers during the 2013-2014 school year and to fully offset the cost of such a program by requiring millionaires to pay at least a 30 percent effective Federal tax rate.

114 Congress, Session 1, Vote Number 00082
On the Amendment S.Amdt. 362 to S.Con.Res. 11 (No short title on file)
To establish a deficit-neutral reserve fund relating to amending the Equal Pay Act of 1963 to allow for punitive damages, limit the any factor "other than sex" exception, and prohibit retaliation against employees who share salary information.

114 Congress, Session 1, Vote Number 00127
On the Amendment S.Amdt. 919 to S.Con.Res. 11 (No short title on file)
To establish a deficit-neutral reserve fund relating to eliminating deductions for corporate compensation in excess of $1,000,000.

111 Congress, Session 1, Vote Number 00394
On the Amendment S.Amdt. 2786 to H.R. 3590 (Patient Protection and Affordable Care Act)
In the nature of a substitute.

111 Congress, Session 1, Vote Number 00386
On the Motion to Table S.Amdt. 3278 to H.R. 3590 (Service Members Home Ownership Tax Act of 2009)
To change the enactment date.

111 Congress, Session 1, Vote Number 00387
On the Amendment S.Amdt. 3276 to S.Amdt. 2786 to H.R. 3590 (Service Members Home Ownership Tax Act of 2009)
To improve the bill.

111 Congress, Session 1, Vote Number 00388
On the Cloture Motion S.Amdt. 2786 to H.R. 3590 (Service Members Home Ownership Tax Act of 2009)
In the nature of a substitute.

111 Congress, Session 1, Vote Number 00392
On the Point of Order S.Amdt. 2786 to H.R. 3590 (Patient Protection and Affordable Care Act)
In the nature of a substitute.

111 Congress, Session 2, Vote Number 00014
On the Joint Resolution H.J.Res. 45
A joint resolution increasing the statutory limit on the public debt.

111 Congress, Session 1, Vote Number 00389
On the Point of Order S.Amdt. 2786 to H.R. 3590 (Patient Protection and Affordable Care Act)
In the nature of a substitute.


In [39]:
bush_votes = defaultdict(dict)

for cong in range(107,111):
    for roll_call in coll.find({"congress": cong},{"raw":0}):
        for member in roll_call["votes"]:
            bush_votes[member["name"]][(cong,roll_call["session"],roll_call["vote_num"])] = member["vote_score"]

In [40]:
partisan_bills(bush_votes)


109 Congress, Session 1, Vote Number 00078
On the Amendment S.Amdt. 211 to S.Con.Res. 18 (No short title on file)
To restore funding for tribal programs and provide necessary additional funding based on recommendations from Indian country

109 Congress, Session 1, Vote Number 00047
On the Amendment S.Amdt. 144 to S.Con.Res. 18 (No short title on file)
To ensure that 75-year solvency has been restored to Social Security before Congress considers new deficit-financed legislation that would increase mandatory spending or cut taxes.

109 Congress, Session 1, Vote Number 00070
On the Amendment S.Amdt. 239 to S.Con.Res. 18 (No short title on file)
Relative to funding to the Office of Community Oriented Policing Services.

109 Congress, Session 2, Vote Number 00035
On the Amendment S.Amdt. 2932 to S. 2349 (527 Reform Act of 2006)
To provide additional transparency in the legislative process.

109 Congress, Session 1, Vote Number 00297
On the Motion (Motion To Waive CBA Re: Lautenberg Amdt. No. 2381)
To require certification prior to beneficiary enrollment in a prescription drug plan or an MA-PD plan that has a gap in the coverage of prescription drugs under part D of title XVIII of the Social Security Act.

109 Congress, Session 1, Vote Number 00296
On the Motion (Motion to Waive CBA Re: Byrd Amdt. No. 2414)
To provide for the suspension of the debate limitation on reconciliation legislation that causes a deficit or increases the deficit.

109 Congress, Session 2, Vote Number 00053
On the Amendment S.Amdt. 3131 to H.J.Res. 47 (No short title on file)
To require a study of debt held by foreigners.

109 Congress, Session 1, Vote Number 00071
On the Amendment S.Amdt. 240 to S.Con.Res. 18 (No short title on file)
Relative to transportation funding.

107 Congress, Session 1, Vote Number 00103
On the Amendment S.Amdt. 378 to S.Amdt. 358 to S. 1 (Better Education for Students and Teachers Act)
To provide for class reduction programs.

108 Congress, Session 1, Vote Number 00085
On the Amendment S.Amdt. 315 to S.Con.Res. 23 (No short title on file)
To ensure that the budget includes funds to extend temporary unemployment compensation benefits, provides benefits to the million long-term unemployed Americans, and provides benefits to part-time and low-wage workers.

108 Congress, Session 1, Vote Number 00086
On the Amendment S.Amdt. 415 to S.Con.Res. 23 (No short title on file)
To increase funding for after-school programs to the levels promised by the No Child Left Behind Act to serve 1.6 million more children in FY 2004 and to increase funding for Head Start to serve 80 percent of eligible 3 and 4 year olds and increase the number of infants and toddlers served and for deficit reduction.

108 Congress, Session 1, Vote Number 00087
On the Amendment S.Amdt. 361 to S.Con.Res. 23 (No short title on file)
To fulfill the U.S. commitment to provide health care to American Indians and Alaska Natives.

109 Congress, Session 2, Vote Number 00169
On the Motion to Table S.Amdt. 4230 to S. 2766 (John Warner National Defense Authorization Act for Fiscal Year 2007)
To improve Federal contracting and procurement by eliminating fraud and abuse and improving competition in contracting and procurement and by enhancing administration of Federal contracting personnel.

107 Congress, Session 1, Vote Number 00337
On the Motion (Motion to Waive CBA re: Sec. 909 of the Baucus Amdt. No. 2125)
To provide a substitute amendment.

107 Congress, Session 2, Vote Number 00057
On the Amendment S.Amdt. 3033 to S.Amdt. 2917 to S. 517 (National Laboratories Partnership Improvement Act of 2001)
To provide for the fair treatment of Presidential judicial nominees.

107 Congress, Session 1, Vote Number 00338
On the Motion (Motion to Waive CBA re: Baucus Amdt. No. 2125)
To provide a substitute amendment.

109 Congress, Session 1, Vote Number 00259
On the Motion (Motion to Suspend Rule XVI, Paragraph 4 Re: Dorgan Amdt. No. 2078)
To establish a special committee of the Senate on war and reconstruction contracting.

109 Congress, Session 1, Vote Number 00229
On the Motion (Motion To Suspend Paragraph 4, Rule XVI RE: Clinton Amdt. NO. 1660)
To establish a congressional commission to examine the Federal, State, and local response to the devastation wrought by Hurricane Katrina in the Gulf Region of the United States especially in the States of Louisiana, Mississippi, Alabama, and other areas impacted in the aftermath and make immediate corrective measures to improve such responses in the future.

109 Congress, Session 1, Vote Number 00306
On the Amendment S.Amdt. 2438 to S. 1042 (National Defense Authorization Act for Fiscal Year 2006)
Relating to the American Forces Network.

109 Congress, Session 1, Vote Number 00077
On the Amendment S.Amdt. 257 to S.Con.Res. 18 (No short title on file)
To establish a point of order.

110 Congress, Session 1, Vote Number 00103
On the Amendment S.Amdt. 508 to S.Con.Res. 21 (No short title on file)
To establish a reserve fund for protecting coverage choices, additional benefits, and lower cost-sharing for Medicare beneficiaries.

108 Congress, Session 1, Vote Number 00091
On the Amendment S.Amdt. 419 to S.Con.Res. 23 (No short title on file)
To increase the budget authority for Federal "FIRE Act" grants and to express the sense of the Senate that from the total funding provided for Federal "FIRE Act" grants, not less than $1,000,000,000 per year will be used for grants to local governments to hire additional firefighters and not less that $750,000,000 per year for the purchase of firefighting equipment and training, and to provide for a reduction in the deficit.

108 Congress, Session 1, Vote Number 00104
On the Amendment S.Amdt. 281 to S.Con.Res. 23 (No short title on file)
To increase the budget allocation for programs to combat the global HIV/AIDS epidemic and to reduce the deficit.

107 Congress, Session 2, Vote Number 00152
On the Motion to Table S.Amdt. 3836 to S. 2600 (Terrorism Risk Insurance Act of 2002)
To provide for procedures for civil actions, and for other purposes.

107 Congress, Session 1, Vote Number 00161
On the Motion (Motion to Waive CBA  re: Grassley Amdt. No. 786)
A bill to provide for reconciliation pursuant to section 104 of the concurrent resolution on the budget for fiscal year 2002.


In [23]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.feature_extraction import DictVectorizer
from sklearn.manifold import MDS
import matplotlib.pyplot as plt
import numpy as np
import random

def graph_bills(bills, model="agglomerative", k=3):
    dict_vectorizer = DictVectorizer()
    features = dict_vectorizer.fit_transform(bills)

    if model == "agglomerative":
        labels = AgglomerativeClustering(n_clusters=k).fit_predict(features.toarray())
    elif model == "kmeans":
        labels = KMeans(n_clusters=k).fit_predict(features)
    return labels
        
    matplotlib.rcParams['figure.figsize'] = (15.0,12.0)

    mds = MDS()
    coords = mds.fit_transform(features.toarray())
    plot = plt.scatter(coords[:,0], coords[:,1], c=labels, s=100)
    plt.axis("off")

In [4]:
obama_bills = defaultdict(dict)
bill_names = dict()

for cong in range(111,115):
    for roll_call in coll.find({"congress": cong},{"raw":0}):
        for member in roll_call["votes"]:
            obama_bills[(cong, roll_call["session"],roll_call["vote_num"])][member["name"]] = member["vote_score"]

In [25]:
graph_bills([ vote for (cong,sess,vote_num),vote in obama_bills.items()], "agglomerative", 4)



In [31]:
senator_vectorizer = DictVectorizer()
bill_ids = obama_bills.keys()
votes = [ obama_bills[bill_id] for bill_id in bill_ids ]
features = senator_vectorizer.fit_transform(votes)

In [27]:
mds = MDS()
coords = mds.fit_transform(features.toarray())

In [75]:
labels = AgglomerativeClustering(n_clusters=3).fit_predict(features.toarray())

In [78]:
plot = plt.scatter(coords[:,0], coords[:,1], c=labels, s=100)
plt.axis("off")

senators = senator_vectorizer.get_feature_names()

def percent(fun, lst):
    matching = sum([ int(fun(x)) for x in lst])
    return matching / float(len(lst))

def invert_cluster_labels(labels):
    """
    Takes in an array-like of shape [n_samples] 
    and returns a dict mapping each label to the list of samples in that cluster
    """
    result = defaultdict(list)
    for sample, label in enumerate(labels):
        result[label].append(sample)
    return result

for cluster in invert_cluster_labels(labels).values():
    random_bills = random.sample([(coords[i], bill_ids[i]) for i in cluster], 15)
    for xy, (cong, sess, vote_num) in random_bills:
        raw_xml = coll.find({"congress": cong, "session": sess, "vote_num": vote_num},{"raw": 1})[0]['raw']
        root = etree.XML(raw_xml.encode('utf-8'))

        bill = obama_bills[(cong, sess, vote_num)]

        yeas = root.xpath("count/yeas")[0].text
        nays = root.xpath("count/nays")[0].text
        vote_result = root.xpath("vote_result_text")[0].text

        dems = filter(lambda (name, vote_score): "(D-" in name, bill.items())
        gop  = filter(lambda (name, vote_score): "(R-" in name, bill.items())

        percent_dem_yeas = percent(lambda (name, vote_score): vote_score > 0, dems)
        percent_gop_yeas = percent(lambda (name, vote_score): vote_score > 0, gop)

        annotation = "{0}-{1}, D: {2:.2f}, R: {3:.2f} Congress {4}".format(yeas, nays, percent_dem_yeas, percent_gop_yeas, cong)

        plt.annotate(annotation, xy = xy, xytext = (1, 1),
            textcoords = 'offset points', ha = 'center', va = 'bottom',
            bbox = dict(boxstyle = 'round,pad=0.3', fc="yellow",alpha = .8)
        )
plt.show()



In [120]:
def get_passed(xml):
    needed, total = xml.xpath("majority_requirement")[0].text.split("/")
    needed = int(needed)
    total = float(total)
    maj_percent = needed / total
    yeas = int(xml.xpath("count/yeas")[0].text)
    nays = int(xml.xpath("count/nays")[0].text)
    
    if (yeas / float(yeas + nays)) > maj_percent:
        return True
    else: 
        return False

def graph_votes_by_party(cong_range):
    x_coords = []
    y_coords = []
    passed = []

    for cong in cong_range:
        for roll_call in coll.find({"congress": cong}):
            raw_xml = roll_call['raw']
            root = etree.XML(raw_xml.encode('utf-8'))

            yeas = root.xpath("count/yeas")[0].text
            nays = root.xpath("count/nays")[0].text
            vote_result = root.xpath("vote_result_text")[0].text

            dems = filter(lambda rec: "(D-" in rec["name"], roll_call["votes"])
            gop  = filter(lambda rec: "(R-" in rec["name"], roll_call["votes"])
            
            percent_dem_yeas = percent(lambda rec: rec["vote_score"] > 0, dems)
            percent_gop_yeas = percent(lambda rec: rec["vote_score"] > 0, gop)

            x_coords.append(percent_dem_yeas)
            y_coords.append(percent_gop_yeas)

            did_pass=get_passed(root)
            passed.append(int(did_pass))

    plot = plt.scatter(x_coords, y_coords, c=passed, s=100)
    plt.xlabel("Dem")
    plt.ylabel("GOP")

In [121]:
graph_votes_by_party(range(111,115))



In [122]:
graph_votes_by_party(range(107,111))



In [123]:
graph_votes_by_party(range(103,107))