In [1]:
import pandas as pd
import numpy as np

In [2]:
legislatorsData = pd.read_csv("../data/legislators.csv")
legislatorsData.head()
legislatorsData.columns
legislators = pd.DataFrame(legislatorsData)
legislators.head()


Out[2]:
title firstname middlename lastname name_suffix nickname party state district in_office ... govtrack_id crp_id twitter_id congresspedia_url youtube_url facebook_id official_rss senate_class birthdate oc_email
0 Rep Neil NaN Abercrombie NaN NaN D HI 1 0 ... 400001 N00007665 neilabercrombie http://www.opencongress.org/wiki/Neil_Abercrombie http://youtube.com/hawaiirep1 NaN NaN NaN 1938-06-26 NaN
1 Rep Gary L. Ackerman NaN NaN D NY 5 0 ... 400003 N00001143 repgaryackerman http://www.opencongress.org/wiki/Gary_Ackerman http://youtube.com/RepAckerman RepAcherman NaN NaN 1942-11-19 NaN
2 Rep Robert B. Aderholt NaN NaN R AL 4 1 ... 400004 N00003028 Robert_Aderholt http://www.opencongress.org/wiki/Robert_Aderholt http://youtube.com/RobertAderholt 19787529402 NaN NaN 1965-07-22 Rep.Aderholt@opencongress.org
3 Sen Daniel Kahikina Akaka NaN NaN D HI Junior Seat 0 ... 300001 N00007653 NaN http://www.opencongress.org/wiki/Daniel_Akaka http://youtube.com/senatorakaka danielakaka NaN I 1924-09-11 NaN
4 Sen Wayne A. Allard NaN NaN R CO Senior Seat 0 ... 300003 N00009082 NaN http://www.opencongress.org/wiki/Wayne_Allard NaN NaN NaN II 1943-12-02 NaN

5 rows × 29 columns


In [3]:
legislators.columns.tolist()


Out[3]:
['title',
 'firstname',
 'middlename',
 'lastname',
 'name_suffix',
 'nickname',
 'party',
 'state',
 'district',
 'in_office',
 'gender',
 'phone',
 'fax',
 'website',
 'webform',
 'congress_office',
 'bioguide_id',
 'votesmart_id',
 'fec_id',
 'govtrack_id',
 'crp_id',
 'twitter_id',
 'congresspedia_url',
 'youtube_url',
 'facebook_id',
 'official_rss',
 'senate_class',
 'birthdate',
 'oc_email']

In [4]:
print legislators.bioguide_id.head()
l_bioGuides = legislators.bioguide_id.tolist()
l_bioGuides[:3]
print "Bio Guides available for the congress :", len(l_bioGuides)


0    A000014
1    A000022
2    A000055
3    A000069
4    A000109
Name: bioguide_id, dtype: object
Bio Guides available for the congress : 897

In [5]:
from urllib2 import Request, urlopen
import json
from pandas.io.json import json_normalize

request=Request('http://capitolwords.org/api/1/phrases.json?entity_type=month&entity_value=201007&sort=count+desc&apikey=0bf8e7eb6ce146f48217bfee767c998d')

response = urlopen(request)
contents = response.read()
data = json.loads(contents)
print "Example object and query to the sunlight API:"
print data[:5]


Example object and query to the sunlight API:
[{u'tfidf': 3.85965571248e-05, u'count': 5373, u'ngram': u'people'}, {u'tfidf': 1.30267768302e-05, u'count': 3637, u'ngram': u'one'}, {u'tfidf': 2.52066478599e-05, u'count': 3509, u'ngram': u'jobs'}, {u'tfidf': 1.17409333103e-05, u'count': 3278, u'ngram': u'american'}, {u'tfidf': 1.14866299957e-05, u'count': 3207, u'ngram': u'years'}]

In [6]:
first_table_ever = json_normalize(data)
print "After converting the content of the request in to a DataFrame\n Popular words of the month \n 100 only prionting a few:"
print "The shape is: ",first_table_ever.shape
first_table_ever.head(8)


After converting the content of the request in to a DataFrame
 Popular words of the month 
 100 only prionting a few:
The shape is:  (100, 3)
Out[6]:
count ngram tfidf
0 5373 people 0.000039
1 3637 one 0.000013
2 3509 jobs 0.000025
3 3278 american 0.000012
4 3207 years 0.000011
5 3051 going 0.000022
6 2874 work 0.000010
7 2874 support 0.000010

In [7]:
print "Yes they are unique"
len(first_table_ever.ngram.unique())


Yes they are unique
Out[7]:
100

In [8]:
print "One legislator fav words"

request=Request('http://capitolwords.org/api/1/phrases.json?entity_type=legislator&entity_value=L000551&apikey=0bf8e7eb6ce146f48217bfee767c998d')

response = urlopen(request)
contents = response.read()
data = json.loads(contents)
print data[:5]
len(data)


One legislator fav words
[{u'tfidf': 0.00227655265553, u'count': 847, u'ngram': u'oakland'}, {u'tfidf': 0.00099708278004, u'count': 218, u'ngram': u'alameda'}, {u'tfidf': 0.000860881733709, u'count': 948, u'ngram': u'aids'}, {u'tfidf': 0.000711050866521, u'count': 487, u'ngram': u'hiv'}, {u'tfidf': 0.000699507858333, u'count': 431, u'ngram': u'haiti'}]
Out[8]:
100

In [9]:
legislator_example = json_normalize(data)
legislator_example.head()
list_of_words = legislator_example.ngram.tolist()
favorite_words ="|".join(list_of_words)
print "list of favorite words of a legislator:"
favorite_words


list of favorite words of a legislator:
Out[9]:
u"oakland|alameda|aids|hiv|haiti|berkeley|african|caribbean|dellums|congresswoman|haitian|pandemic|bay|caucus|black|darfur|africa|iraq|genocide|disparities|california's|occupation|francisco|asian|sudan|gentlewoman|california|housing|color|naacp|cuba|racism|poverty|slavery|peace|san|bush|african-american|reverend|rights|baptist|really|unemployed|health|racial|height|civil|minorities|global|equality|women|iran|justice|movement|troops|uninsured|tonight|pacific|pastor|ron|congressman|social|discrimination|violence|waters|progressive|education|humanitarian|human|affordable|international|salute|church|afghanistan|jackson|girls|port|community|king|prescription|war|res|low-income|communities|weapons|unemployment|organizing|women's|security|poor|youth|drugs|nations|h.|frankly|nuclear|parks|green|resolution|congressional"

In [10]:
legislators_option1 = legislators
def requestWords( id ):
    id = str(id)
    url = "http://capitolwords.org/api/1/phrases.json?entity_type=legislator&entity_value="+id+"&apikey=0bf8e7eb6ce146f48217bfee767c998d"
    request=Request(url)
    response = urlopen(request)
    contents = response.read()
    len(contents)
    if len(contents) > 2:
        data = json.loads(contents)
        words = json_normalize(data)
        list_of_words = words.ngram.tolist()
        string_of_words ="|".join(list_of_words)
        return string_of_words
    else:
        return np.nan

requestWords(id ="A000369")


Out[10]:
nan

In [11]:
legislators_option1.dtypes
legislators_option1.bioguide_id.astype(str)
legislators_option1.dtypes
legislators_option1['favorite_words'] = legislators_option1.apply(lambda row: requestWords(row['bioguide_id']),axis=1)

In [12]:
legislators_option1.favorite_words.head(20)


Out[12]:
0     hawaiian|hawaii|hawaiians|hawaii's|kalaupapa|e...
1     queens|rabbi|jewish|bayside|flushing|nassau|br...
2     aderholt|requesting|irons|huntsville|alabama|r...
3     hawaii's|hawaii|hawaiians|hawaiian|dsh|va|fas|...
4     colorado|flats|missile|rocky|colorado's|denver...
5     camden|gloucester|cyprus|rutgers|opic|jersey|p...
6     mercury|maine|prescription|pharmaceutical|drug...
7     freddie|morgenthau|fannie|you've|pilgrims|mayf...
8     tennesseans|carbon-free|tennessee|electricity|...
9     rodney|baton|rouge|lsu|louisiana|la|ruston|req...
10    murphy|drill|pittsburgh|altmire|sbir|oil|anwr|...
11    upstate|herkimer|utica|tay-sachs|suny|cybersec...
12    nj|2009|objection|army|minutes|recognized|6|se...
13    fairborn|ohio's|xenia|gire|wright-patterson|wa...
14    endeavour|skyler|meanings|shuttle|eagle|scout|...
15    foia|8015|aumf|government-set|davis-bacon|1034...
16    hampshire|guantanamo|timberland|gitmo|detainin...
17                                                  NaN
18    alean|brock|gantt|anderton|calvin's|sickle|cal...
19                                                  NaN
Name: favorite_words, dtype: object

In [23]:
# Beneficiaries for events:
def requestBeneficiaries( id ):
    id = str(id)
    url = "/politicalpartytime.org/api/v1/event/?format=json&beneficiaries__crp_id="+id+"&apikey=0bf8e7eb6ce146f48217bfee767c998d"
    request=Request(url)
    response = urlopen(request)
    print response

    
    if len(contents) > 2:
        data = json.loads(contents)
        beneficiary = json_normalize(data)
        #words = json_normalize(data)
        #list_of_words = words.ngram.tolist()
        #string_of_words ="|".join(list_of_words)
        #return string_of_words
    else:
        return np.nan

requestBeneficiaries(id ="N00003675")


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-98fcbc61e9e1> in <module>()
     18         return np.nan
     19 
---> 20 requestBeneficiaries(id ="N00003675")

<ipython-input-23-98fcbc61e9e1> in requestBeneficiaries(id)
      4     url = "/politicalpartytime.org/api/v1/event/?format=json&beneficiaries__crp_id="+id+"&apikey=0bf8e7eb6ce146f48217bfee767c998d"
      5     request=Request(url)
----> 6     response = urlopen(request)
      7     print response
      8 

/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    152     else:
    153         opener = _opener
--> 154     return opener.open(url, data, timeout)
    155 
    156 def install_opener(opener):

/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    421 
    422         req.timeout = timeout
--> 423         protocol = req.get_type()
    424 
    425         # pre-process request

/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.pyc in get_type(self)
    283             self.type, self.__r_type = splittype(self.__original)
    284             if self.type is None:
--> 285                 raise ValueError, "unknown url type: %s" % self.__original
    286         return self.type
    287 

ValueError: unknown url type: /politicalpartytime.org/api/v1/event/?format=json&beneficiaries__crp_id=N00003675&apikey=0bf8e7eb6ce146f48217bfee767c998d

In [ ]: