Assignment 4

Using data from this FiveThirtyEight post, write code to calculate the correlation of the responses from the poll. Respond to the story in your PR. Is this a good example of data journalism? Why or why not?


In [10]:
import pandas as pd
%matplotlib inline
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

In [13]:
df = pd.read_csv("Iran_data_3.csv")

In [14]:
df


Out[14]:
Subject Sentiment Total Dem Rep Ind Men Women White Black ... No Degree Under Age 35 35-54 55+ 65+ Under $50k $50k+ Lib Cons Party
0 Obama Approve 44% 78% 10% 37% 41% 47% 35% 85% ... 43% 56% 35% 45% 43% 48% 42% 69% 24% 15%
1 Obama Disapprove 50% 17% 86% 53% 53% 47% 60% 7% ... 52% 36% 60% 50% 52% 46% 52% 24% 72% 82%
2 Obama (Don't know) 6% 5% 4% 10% 5% 6% 5% 8% ... 6% 8% 4% 5% 5% 5% 5% 6% 4% 2%
3 Negotiations Very confident 17% 29% 4% 15% 17% 17% 11% 37% ... 14% 18% 13% 19% 18% 18% 16% 24% 10% 6%
4 Negotiations Somewhat confident 31% 49% 13% 29% 28% 35% 29% 46% ... 29% 39% 29% 29% 26% 34% 31% 48% 19% 11%
5 Negotiations Not very confident 19% 11% 26% 22% 18% 19% 21% 6% ... 22% 23% 20% 15% 14% 16% 20% 16% 22% 16%
6 Negotiations Not at all confident 31% 8% 57% 32% 35% 27% 37% 5% ... 32% 17% 38% 34% 40% 29% 32% 11% 47% 67%
7 Negotiations (Don't know) 2% 4% 1% 2% 2% 2% 2% 5% ... 3% 3% 0 3% 2% 3% 1% 1% 2% 0
8 Deal Favor 47% 60% 34% 44% 46% 47% 45% 54% ... 45% 51% 46% 45% 41% 47% 48% 61% 35% 35%
9 Deal Oppose 43% 26% 60% 49% 48% 39% 46% 31% ... 45% 37% 47% 44% 49% 45% 41% 28% 56% 61%
10 Deal (Don't know) 10% 14% 6% 7% 6% 14% 9% 15% ... 10% 12% 7% 11% 10% 9% 11% 11% 9% 4%

11 rows × 21 columns


In [15]:
df.head()


Out[15]:
Subject Sentiment Total Dem Rep Ind Men Women White Black ... No Degree Under Age 35 35-54 55+ 65+ Under $50k $50k+ Lib Cons Party
0 Obama Approve 44% 78% 10% 37% 41% 47% 35% 85% ... 43% 56% 35% 45% 43% 48% 42% 69% 24% 15%
1 Obama Disapprove 50% 17% 86% 53% 53% 47% 60% 7% ... 52% 36% 60% 50% 52% 46% 52% 24% 72% 82%
2 Obama (Don't know) 6% 5% 4% 10% 5% 6% 5% 8% ... 6% 8% 4% 5% 5% 5% 5% 6% 4% 2%
3 Negotiations Very confident 17% 29% 4% 15% 17% 17% 11% 37% ... 14% 18% 13% 19% 18% 18% 16% 24% 10% 6%
4 Negotiations Somewhat confident 31% 49% 13% 29% 28% 35% 29% 46% ... 29% 39% 29% 29% 26% 34% 31% 48% 19% 11%

5 rows × 21 columns


In [21]:
obama_admin_supporters_df = df[df['Sentiment'] == 'Approve']

In [22]:
obama_admin_supporters_df


Out[22]:
Subject Sentiment Total Dem Rep Ind Men Women White Black ... No Degree Under Age 35 35-54 55+ 65+ Under $50k $50k+ Lib Cons Party
0 Obama Approve 44% 78% 10% 37% 41% 47% 35% 85% ... 43% 56% 35% 45% 43% 48% 42% 69% 24% 15%

1 rows × 21 columns


In [25]:
del obama_admin_supporters_df  ['Sentiment']

In [26]:
iran_deal_supporters_df = df[df['Sentiment'] == 'Favor']

In [27]:
iran_deal_supporters_df


Out[27]:
Subject Sentiment Total Dem Rep Ind Men Women White Black ... No Degree Under Age 35 35-54 55+ 65+ Under $50k $50k+ Lib Cons Party
8 Deal Favor 47% 60% 34% 44% 46% 47% 45% 54% ... 45% 51% 46% 45% 41% 47% 48% 61% 35% 35%

1 rows × 21 columns


In [28]:
del iran_deal_supporters_df['Sentiment']

In [29]:
iran_deal_supporters_df


Out[29]:
Subject Total Dem Rep Ind Men Women White Black College Degree No Degree Under Age 35 35-54 55+ 65+ Under $50k $50k+ Lib Cons Party
8 Deal 47% 60% 34% 44% 46% 47% 45% 54% 50% 45% 51% 46% 45% 41% 47% 48% 61% 35% 35%

In [30]:
deal_and_obama_approval_df = obama_admin_supporters_df.append(iran_deal_supporters_df)

In [31]:
deal_and_obama_approval_df


Out[31]:
Subject Total Dem Rep Ind Men Women White Black College Degree No Degree Under Age 35 35-54 55+ 65+ Under $50k $50k+ Lib Cons Party
0 Obama 44% 78% 10% 37% 41% 47% 35% 85% 47% 43% 56% 35% 45% 43% 48% 42% 69% 24% 15%
8 Deal 47% 60% 34% 44% 46% 47% 45% 54% 50% 45% 51% 46% 45% 41% 47% 48% 61% 35% 35%

In [32]:
del deal_and_obama_approval_df['Subject']

In [33]:
del deal_and_obama_approval_df['Total']

In [34]:
deal_and_obama_approval_transpose_df = deal_and_obama_approval_df.transpose()

In [42]:
deal_and_obama_approval_transpose_df.columns = ["Obama Admin Approval", "Iran Deal Support"]
deal_and_obama_approval_transpose_df


Out[42]:
Obama Admin Approval Iran Deal Support
Dem 78% 60%
Rep 10% 34%
Ind 37% 44%
Men 41% 46%
Women 47% 47%
White 35% 45%
Black 85% 54%
College Degree 47% 50%
No Degree 43% 45%
Under Age 35 56% 51%
35-54 35% 46%
55+ 45% 45%
65+ 43% 41%
Under $50k 48% 47%
$50k+ 42% 48%
Lib 69% 61%
Cons 24% 35%
Party 15% 35%

In [43]:
plt.style.use('fivethirtyeight')

In [44]:
deal_and_obama_approval_transpose_df.plot(kind='scatter', x= 'Obama Admin Approval', y='Iran Deal Support')


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-44-3d09f3d484db> in <module>()
----> 1 deal_and_obama_approval_transpose_df.plot(kind='scatter', x= 'Obama Admin Approval', y='Iran Deal Support')

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/pandas/tools/plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   3738                           fontsize=fontsize, colormap=colormap, table=table,
   3739                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3740                           sort_columns=sort_columns, **kwds)
   3741     __call__.__doc__ = plot_frame.__doc__
   3742 

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/pandas/tools/plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2612                  yerr=yerr, xerr=xerr,
   2613                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 2614                  **kwds)
   2615 
   2616 

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/pandas/tools/plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   2439         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2440 
-> 2441     plot_obj.generate()
   2442     plot_obj.draw()
   2443     return plot_obj.result

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/pandas/tools/plotting.py in generate(self)
   1024     def generate(self):
   1025         self._args_adjust()
-> 1026         self._compute_plot_data()
   1027         self._setup_subplots()
   1028         self._make_plot()

/Users/skkandrach/.virtualenvs/lede/lib/python3.5/site-packages/pandas/tools/plotting.py in _compute_plot_data(self)
   1133         if is_empty:
   1134             raise TypeError('Empty {0!r}: no numeric data to '
-> 1135                             'plot'.format(numeric_data.__class__.__name__))
   1136 
   1137         self.data = numeric_data

TypeError: Empty 'DataFrame': no numeric data to plot

In [45]:
deal_and_obama_approval_transpose_df.corr()


Out[45]:

In [46]:
lm = smf.ols(formula='Favor_Deal~Approve_Obama',deal_and_obama_approval_transpose_df).fit()
lm.params


  File "<ipython-input-46-0a01704b953f>", line 1
    lm = smf.ols(formula='Favor_Deal~Approve_Obama',deal_and_obama_approval_transpose_df).fit()
                                                   ^
SyntaxError: positional argument follows keyword argument

In [47]:
intercept, slope = lm.params
ax = deal_and_obama_approval_transpose_df.plot(kind='scatter', x= 'Obama Admin Approval', y='Iran Deal Support')
plt.plot(deal_and_obama_approval_transpose_df["Approve_Obama"],slope*deal_and_obama_approval_transpose_df["bama Admin Approval"]+intercept,"-",color="blue")
ax.set_title("Obama Admin Approval Prediction Towards Iran Deal Support")
ax.set_ylabel('Iran Deal Support')
ax.set_xlabel("Approve of Obama")


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-47-098332244c2a> in <module>()
----> 1 intercept, slope = lm.params
      2 ax = deal_and_obama_approval_transpose_df.plot(kind='scatter', x= 'Obama Admin Approval', y='Iran Deal Support')
      3 plt.plot(deal_and_obama_approval_transpose_df["Approve_Obama"],slope*deal_and_obama_approval_transpose_df["bama Admin Approval"]+intercept,"-",color="blue")
      4 ax.set_title("Obama Admin Approval Prediction Towards Iran Deal Support")
      5 ax.set_ylabel('Iran Deal Support')

NameError: name 'lm' is not defined

In [ ]:


In [ ]:


In [ ]: