Getting Sunlight text


In [1]:
from sunlight import *
import csv
import codecs

In [23]:
all_R = []

In [24]:
for i in range(100):
    response = capitolwords.text(phrase='obamacare', congress='113', page = str(i))
    all_R.extend(response)

In [25]:
len(all_R)


Out[25]:
2363

In [28]:
for i in all_R:
    for key in i.keys():
        try:
            i.update({ key : str(i[key]).decode('utf-8') })
        except UnicodeEncodeError:
            i.update({ key : 'Jose' })

In [29]:
statements = []
for i in all_R:
    dic = {}
    dic['chamber'] = i['chamber'].encode('utf8')
    dic['congress'] = i['congress'].encode('utf8')
    dic['date'] = i['date'].encode('utf8')
    dic['number'] = i['number'].encode('utf8')
    dic['order'] = i['order'].encode('utf8')
    dic['speaker_first'] = i['speaker_first'].encode('utf8')
    dic['speaker_last'] = i['speaker_last'].encode('utf8')
    dic['speaker_party'] = i['speaker_party'].encode('utf8')
    dic['speaker_state'] = i['speaker_state'].encode('utf8')
    dic['speaker_first'] = i['speaker_first'].encode('utf8')
    dic['speaking'] = i['speaking'].encode('utf8')
    statements.append(dic)

In [30]:
keys = statements[0].keys()

In [31]:
#writing the rest
with codecs.open('data.csv', 'w', 'utf8') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(statements)

In [ ]: