In [45]:
from collections import defaultdict
from pymongo import MongoClient
import matplotlib
%matplotlib inline
client = MongoClient()
db = client["senators"]
coll = db["raw_xml"]
In [46]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.feature_extraction import DictVectorizer
from sklearn.manifold import MDS
import matplotlib.pyplot as plt
import numpy as np
import random
def graph_senators(names, senators, model="agglomerative"):
dict_vectorizer = DictVectorizer()
features = dict_vectorizer.fit_transform(senators)
if model == "agglomerative":
labels = AgglomerativeClustering().fit_predict(features.toarray())
elif model == "kmeans":
labels = KMeans(2).fit_predict(features)
matplotlib.rcParams['figure.figsize'] = (15.0,12.0)
mds = MDS()
coords = mds.fit_transform(features.toarray())
plot = plt.scatter(coords[:,0], coords[:,1], c=labels, s=100)
plt.axis("off")
for i, (name, xy) in enumerate(zip(names, coords)):
color = "blue" if labels[i] else "red"
plt.annotate(name, xy = xy, xytext = (1, 1),
textcoords = 'offset points', ha = 'right', va = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.3', fc=color,alpha = .4)
)
In [47]:
votes = defaultdict(dict)
for cong in range(111,115):
for roll_call in coll.find({"congress": cong},{"raw":0}):
for member in roll_call["votes"]:
votes[member["name"]][(cong, roll_call["session"],roll_call["vote_num"])] = member["vote_score"]
In [48]:
#Obama
senator_names = votes.keys()
senators = [votes[name] for name in senator_names]
graph_senators(senator_names, senators)
In [ ]:
graph_senators(senator_names, senators, "kmeans")
In [ ]:
bush_votes = defaultdict(dict)
for cong in range(107,111):
for roll_call in coll.find({"congress": cong},{"raw":0}):
for member in roll_call["votes"]:
bush_votes[member["name"]][(cong, roll_call["session"],roll_call["vote_num"])] = member["vote_score"]
In [ ]:
#Bush
senator_names = bush_votes.keys()
senators = [bush_votes[name] for name in senator_names]
graph_senators(senator_names, senators)
In [ ]:
graph_senators(senator_names, senators,"kmeans")