notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np



In [2]:

    
legislatorsData = pd.read_csv("../data/legislators.csv")
legislatorsData.head()
legislatorsData.columns
legislators = pd.DataFrame(legislatorsData)
legislators.head()









    Out[2]:






  
    
      
      title
      firstname
      middlename
      lastname
      name_suffix
      nickname
      party
      state
      district
      in_office
      ...
      govtrack_id
      crp_id
      twitter_id
      congresspedia_url
      youtube_url
      facebook_id
      official_rss
      senate_class
      birthdate
      oc_email
    
  
  
    
      0
      Rep
      Neil
      NaN
      Abercrombie
      NaN
      NaN
      D
      HI
      1
      0
      ...
      400001
      N00007665
      neilabercrombie
      http://www.opencongress.org/wiki/Neil_Abercrombie
      http://youtube.com/hawaiirep1
      NaN
      NaN
      NaN
      1938-06-26
      NaN
    
    
      1
      Rep
      Gary
      L.
      Ackerman
      NaN
      NaN
      D
      NY
      5
      0
      ...
      400003
      N00001143
      repgaryackerman
      http://www.opencongress.org/wiki/Gary_Ackerman
      http://youtube.com/RepAckerman
      RepAcherman
      NaN
      NaN
      1942-11-19
      NaN
    
    
      2
      Rep
      Robert
      B.
      Aderholt
      NaN
      NaN
      R
      AL
      4
      1
      ...
      400004
      N00003028
      Robert_Aderholt
      http://www.opencongress.org/wiki/Robert_Aderholt
      http://youtube.com/RobertAderholt
      19787529402
      NaN
      NaN
      1965-07-22
      Rep.Aderholt@opencongress.org
    
    
      3
      Sen
      Daniel
      Kahikina
      Akaka
      NaN
      NaN
      D
      HI
      Junior Seat
      0
      ...
      300001
      N00007653
      NaN
      http://www.opencongress.org/wiki/Daniel_Akaka
      http://youtube.com/senatorakaka
      danielakaka
      NaN
      I
      1924-09-11
      NaN
    
    
      4
      Sen
      Wayne
      A.
      Allard
      NaN
      NaN
      R
      CO
      Senior Seat
      0
      ...
      300003
      N00009082
      NaN
      http://www.opencongress.org/wiki/Wayne_Allard
      NaN
      NaN
      NaN
      II
      1943-12-02
      NaN
    
  

5 rows × 29 columns



In [3]:

    
legislators.columns.tolist()









    Out[3]:





['title',
 'firstname',
 'middlename',
 'lastname',
 'name_suffix',
 'nickname',
 'party',
 'state',
 'district',
 'in_office',
 'gender',
 'phone',
 'fax',
 'website',
 'webform',
 'congress_office',
 'bioguide_id',
 'votesmart_id',
 'fec_id',
 'govtrack_id',
 'crp_id',
 'twitter_id',
 'congresspedia_url',
 'youtube_url',
 'facebook_id',
 'official_rss',
 'senate_class',
 'birthdate',
 'oc_email']



In [4]:

    
print legislators.bioguide_id.head()
l_bioGuides = legislators.bioguide_id.tolist()
l_bioGuides[:3]
print "Bio Guides available for the congress :", len(l_bioGuides)









    



0    A000014
1    A000022
2    A000055
3    A000069
4    A000109
Name: bioguide_id, dtype: object
Bio Guides available for the congress : 897



In [5]:

    
from urllib2 import Request, urlopen
import json
from pandas.io.json import json_normalize

request=Request('http://capitolwords.org/api/1/phrases.json?entity_type=month&entity_value=201007&sort=count+desc&apikey=0bf8e7eb6ce146f48217bfee767c998d')

response = urlopen(request)
contents = response.read()
data = json.loads(contents)
print "Example object and query to the sunlight API:"
print data[:5]









    



Example object and query to the sunlight API:
[{u'tfidf': 3.85965571248e-05, u'count': 5373, u'ngram': u'people'}, {u'tfidf': 1.30267768302e-05, u'count': 3637, u'ngram': u'one'}, {u'tfidf': 2.52066478599e-05, u'count': 3509, u'ngram': u'jobs'}, {u'tfidf': 1.17409333103e-05, u'count': 3278, u'ngram': u'american'}, {u'tfidf': 1.14866299957e-05, u'count': 3207, u'ngram': u'years'}]



In [6]:

    
first_table_ever = json_normalize(data)
print "After converting the content of the request in to a DataFrame\n Popular words of the month \n 100 only prionting a few:"
print "The shape is: ",first_table_ever.shape
first_table_ever.head(8)









    



After converting the content of the request in to a DataFrame
 Popular words of the month 
 100 only prionting a few:
The shape is:  (100, 3)






    Out[6]:






  
    
      
      count
      ngram
      tfidf
    
  
  
    
      0
      5373
      people
      0.000039
    
    
      1
      3637
      one
      0.000013
    
    
      2
      3509
      jobs
      0.000025
    
    
      3
      3278
      american
      0.000012
    
    
      4
      3207
      years
      0.000011
    
    
      5
      3051
      going
      0.000022
    
    
      6
      2874
      work
      0.000010
    
    
      7
      2874
      support
      0.000010



In [7]:

    
print "Yes they are unique"
len(first_table_ever.ngram.unique())









    



Yes they are unique






    Out[7]:





100



In [8]:

    
print "One legislator fav words"

request=Request('http://capitolwords.org/api/1/phrases.json?entity_type=legislator&entity_value=L000551&apikey=0bf8e7eb6ce146f48217bfee767c998d')

response = urlopen(request)
contents = response.read()
data = json.loads(contents)
print data[:5]
len(data)









    



One legislator fav words
[{u'tfidf': 0.00227655265553, u'count': 847, u'ngram': u'oakland'}, {u'tfidf': 0.00099708278004, u'count': 218, u'ngram': u'alameda'}, {u'tfidf': 0.000860881733709, u'count': 948, u'ngram': u'aids'}, {u'tfidf': 0.000711050866521, u'count': 487, u'ngram': u'hiv'}, {u'tfidf': 0.000699507858333, u'count': 431, u'ngram': u'haiti'}]






    Out[8]:





100



In [9]:

    
legislator_example = json_normalize(data)
legislator_example.head()
list_of_words = legislator_example.ngram.tolist()
favorite_words ="|".join(list_of_words)
print "list of favorite words of a legislator:"
favorite_words









    



list of favorite words of a legislator:






    Out[9]:





u"oakland|alameda|aids|hiv|haiti|berkeley|african|caribbean|dellums|congresswoman|haitian|pandemic|bay|caucus|black|darfur|africa|iraq|genocide|disparities|california's|occupation|francisco|asian|sudan|gentlewoman|california|housing|color|naacp|cuba|racism|poverty|slavery|peace|san|bush|african-american|reverend|rights|baptist|really|unemployed|health|racial|height|civil|minorities|global|equality|women|iran|justice|movement|troops|uninsured|tonight|pacific|pastor|ron|congressman|social|discrimination|violence|waters|progressive|education|humanitarian|human|affordable|international|salute|church|afghanistan|jackson|girls|port|community|king|prescription|war|res|low-income|communities|weapons|unemployment|organizing|women's|security|poor|youth|drugs|nations|h.|frankly|nuclear|parks|green|resolution|congressional"



In [10]:

    
legislators_option1 = legislators
def requestWords( id ):
    id = str(id)
    url = "http://capitolwords.org/api/1/phrases.json?entity_type=legislator&entity_value="+id+"&apikey=0bf8e7eb6ce146f48217bfee767c998d"
    request=Request(url)
    response = urlopen(request)
    contents = response.read()
    len(contents)
    if len(contents) > 2:
        data = json.loads(contents)
        words = json_normalize(data)
        list_of_words = words.ngram.tolist()
        string_of_words ="|".join(list_of_words)
        return string_of_words
    else:
        return np.nan

requestWords(id ="A000369")









    Out[10]:





nan



In [11]:

    
legislators_option1.dtypes
legislators_option1.bioguide_id.astype(str)
legislators_option1.dtypes
legislators_option1['favorite_words'] = legislators_option1.apply(lambda row: requestWords(row['bioguide_id']),axis=1)



In [12]:

    
legislators_option1.favorite_words.head(20)









    Out[12]:





0     hawaiian|hawaii|hawaiians|hawaii's|kalaupapa|e...
1     queens|rabbi|jewish|bayside|flushing|nassau|br...
2     aderholt|requesting|irons|huntsville|alabama|r...
3     hawaii's|hawaii|hawaiians|hawaiian|dsh|va|fas|...
4     colorado|flats|missile|rocky|colorado's|denver...
5     camden|gloucester|cyprus|rutgers|opic|jersey|p...
6     mercury|maine|prescription|pharmaceutical|drug...
7     freddie|morgenthau|fannie|you've|pilgrims|mayf...
8     tennesseans|carbon-free|tennessee|electricity|...
9     rodney|baton|rouge|lsu|louisiana|la|ruston|req...
10    murphy|drill|pittsburgh|altmire|sbir|oil|anwr|...
11    upstate|herkimer|utica|tay-sachs|suny|cybersec...
12    nj|2009|objection|army|minutes|recognized|6|se...
13    fairborn|ohio's|xenia|gire|wright-patterson|wa...
14    endeavour|skyler|meanings|shuttle|eagle|scout|...
15    foia|8015|aumf|government-set|davis-bacon|1034...
16    hampshire|guantanamo|timberland|gitmo|detainin...
17                                                  NaN
18    alean|brock|gantt|anderton|calvin's|sickle|cal...
19                                                  NaN
Name: favorite_words, dtype: object



In [23]:

    
# Beneficiaries for events:
def requestBeneficiaries( id ):
    id = str(id)
    url = "/politicalpartytime.org/api/v1/event/?format=json&beneficiaries__crp_id="+id+"&apikey=0bf8e7eb6ce146f48217bfee767c998d"
    request=Request(url)
    response = urlopen(request)
    print response

    
    if len(contents) > 2:
        data = json.loads(contents)
        beneficiary = json_normalize(data)
        #words = json_normalize(data)
        #list_of_words = words.ngram.tolist()
        #string_of_words ="|".join(list_of_words)
        #return string_of_words
    else:
        return np.nan

requestBeneficiaries(id ="N00003675")









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-98fcbc61e9e1> in <module>()
     18         return np.nan
     19 
---> 20 requestBeneficiaries(id ="N00003675")

<ipython-input-23-98fcbc61e9e1> in requestBeneficiaries(id)
      4     url = "/politicalpartytime.org/api/v1/event/?format=json&beneficiaries__crp_id="+id+"&apikey=0bf8e7eb6ce146f48217bfee767c998d"
      5     request=Request(url)
----> 6     response = urlopen(request)
      7     print response
      8 

/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    152     else:
    153         opener = _opener
--> 154     return opener.open(url, data, timeout)
    155 
    156 def install_opener(opener):

/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    421 
    422         req.timeout = timeout
--> 423         protocol = req.get_type()
    424 
    425         # pre-process request

/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.pyc in get_type(self)
    283             self.type, self.__r_type = splittype(self.__original)
    284             if self.type is None:
--> 285                 raise ValueError, "unknown url type: %s" % self.__original
    286         return self.type
    287 

ValueError: unknown url type: /politicalpartytime.org/api/v1/event/?format=json&beneficiaries__crp_id=N00003675&apikey=0bf8e7eb6ce146f48217bfee767c998d



In [ ]:

	title	firstname	middlename	lastname	name_suffix	nickname	party	state	district	in_office	...	govtrack_id	crp_id	twitter_id	congresspedia_url	youtube_url	facebook_id	official_rss	senate_class	birthdate	oc_email
0	Rep	Neil	NaN	Abercrombie	NaN	NaN	D	HI	1	0	...	400001	N00007665	neilabercrombie	http://www.opencongress.org/wiki/Neil_Abercrombie	http://youtube.com/hawaiirep1	NaN	NaN	NaN	1938-06-26	NaN
1	Rep	Gary	L.	Ackerman	NaN	NaN	D	NY	5	0	...	400003	N00001143	repgaryackerman	http://www.opencongress.org/wiki/Gary_Ackerman	http://youtube.com/RepAckerman	RepAcherman	NaN	NaN	1942-11-19	NaN
2	Rep	Robert	B.	Aderholt	NaN	NaN	R	AL	4	1	...	400004	N00003028	Robert_Aderholt	http://www.opencongress.org/wiki/Robert_Aderholt	http://youtube.com/RobertAderholt	19787529402	NaN	NaN	1965-07-22	Rep.Aderholt@opencongress.org
3	Sen	Daniel	Kahikina	Akaka	NaN	NaN	D	HI	Junior Seat	0	...	300001	N00007653	NaN	http://www.opencongress.org/wiki/Daniel_Akaka	http://youtube.com/senatorakaka	danielakaka	NaN	I	1924-09-11	NaN
4	Sen	Wayne	A.	Allard	NaN	NaN	R	CO	Senior Seat	0	...	300003	N00009082	NaN	http://www.opencongress.org/wiki/Wayne_Allard	NaN	NaN	NaN	II	1943-12-02	NaN

	count	ngram	tfidf
0	5373	people	0.000039
1	3637	one	0.000013
2	3509	jobs	0.000025
3	3278	american	0.000012
4	3207	years	0.000011
5	3051	going	0.000022
6	2874	work	0.000010
7	2874	support	0.000010