In [1]:
import requests
import pandas as pd

pd.set_option('display.max_columns', 30)

In [2]:
polls = requests.get("http://elections.huffingtonpost.com/pollster/api/polls.json").json()

In [3]:
df_dumb = pd.DataFrame.from_dict(polls)
df_dumb.head(2)


Out[3]:
affiliation end_date id last_updated method partisan pollster questions source sponsors start_date survey_houses
0 None 2016-04-18 24320 2016-04-20T17:13:30.513Z Automated Phone Nonpartisan Gravis Marketing [{'name': '2016 Delaware Republican Presidenti... http://gravismarketing.com/polling-and-market-... [] 2016-04-17 [{'name': 'Gravis Marketing', 'party': 'N/A'}]
1 None 2016-04-19 24319 2016-04-20T17:02:37.561Z Live Phone Nonpartisan Gallup [{'name': 'Obama Job Approval', 'topic': 'obam... http://www.gallup.com/poll/113980/Gallup-Daily... [] 2016-04-17 [{'name': 'Gallup Poll', 'party': 'N/A'}]

In [4]:
df_dumber = pd.DataFrame.from_dict(df_dumb.iloc[0].questions)
df_dumber.head()


Out[4]:
chart name state subpopulations topic
0 None 2016 Delaware Republican Presidential Primary None [{'name': 'Likely Voters - Republican', 'margi... None
1 None 2016 Delaware Democratic Presidential Primary None [{'name': 'Likely Voters - Democrat', 'margin_... None

In [5]:
df_dumbest = pd.DataFrame.from_dict(df_dumber.iloc[0].subpopulations)
df_dumbest.head()


Out[5]:
margin_of_error name observations responses
0 3.0 Likely Voters - Republican 1038 [{'choice': 'Cruz', 'first_name': 'Ted', 'last...

In [6]:
df_dumber_than_dumbest = pd.DataFrame.from_dict(df_dumbest.iloc[0].responses)
df_dumber_than_dumbest.head()


Out[6]:
choice first_name incumbent last_name party value
0 Cruz Ted False Cruz Rep 15
1 Kasich John False Kasich Rep 18
2 Trump Donald False Trump Rep 55
3 Undecided None None None None 12

In [7]:
rows = []

for poll in polls:
    poll_copy = {"poll_" + str(k): v for k,v in poll.items() if k != 'questions'}
    for question in poll['questions']:
        question_copy = {"question_" + str(k):v for k,v in question.items() if k != 'subpopulations'}
        for subpopulation in question['subpopulations']:
            subpopulation_copy = {"subpopulation_" + str(k):v for k,v in subpopulation.items() if k != 'responses'}
            for response in subpopulation['responses']:
                response_copy = response.copy()
                response_copy.update(subpopulation_copy)
                response_copy.update(question_copy)
                response_copy.update(poll_copy)
                
                rows.append(response_copy)
                
df = pd.DataFrame.from_dict(rows)
df.head(2)


Out[7]:
choice first_name incumbent last_name party poll_affiliation poll_end_date poll_id poll_last_updated poll_method poll_partisan poll_pollster poll_source poll_sponsors poll_start_date poll_survey_houses question_chart question_name question_state question_topic subpopulation_margin_of_error subpopulation_name subpopulation_observations value
0 Cruz Ted False Cruz Rep None 2016-04-18 24320 2016-04-20T17:13:30.513Z Automated Phone Nonpartisan Gravis Marketing http://gravismarketing.com/polling-and-market-... [] 2016-04-17 [{'name': 'Gravis Marketing', 'party': 'N/A'}] None 2016 Delaware Republican Presidential Primary None None 3.0 Likely Voters - Republican 1038 15
1 Kasich John False Kasich Rep None 2016-04-18 24320 2016-04-20T17:13:30.513Z Automated Phone Nonpartisan Gravis Marketing http://gravismarketing.com/polling-and-market-... [] 2016-04-17 [{'name': 'Gravis Marketing', 'party': 'N/A'}] None 2016 Delaware Republican Presidential Primary None None 3.0 Likely Voters - Republican 1038 18