Assignment 4

  • Using data from this FiveThirtyEight post, write code to calculate the correlation of the responses from the poll.
  • Respond to the story in your PR. Is this a good example of data journalism? Why or why not?

In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
%matplotlib inline

In [41]:
df = pd.read_csv("approval-iran.csv")

In [42]:
df


Out[42]:
Group Favor_Iran Approve_Obama
0 Republicans 0.34 0.10
1 Democrats 0.60 0.78
2 White 0.45 0.35
3 Black 0.54 0.85
4 Ind 0.44 0.37
5 Men 0.46 0.41
6 Women 0.47 0.47
7 Degree 0.50 0.47
8 Nodegree 0.45 0.43
9 age35- 0.51 0.56
10 age35-44 0.46 0.35
11 age55+ 0.45 0.45
12 age65+ 0.41 0.43
13 50k- 0.47 0.48
14 50k+ 0.48 0.42
15 Lib 0.61 0.69
16 Cons 0.35 0.24
17 Teaparty 0.35 0.15

In [11]:
#df2 = df.set_index('Group')

In [49]:
lm = smf.ols(formula='Favor_Iran~Approve_Obama',data=df).fit()
lm.params


Out[49]:
Intercept        0.305280
Approve_Obama    0.355619
dtype: float64

In [58]:
intercept, slope = lm.params

In [65]:
plt.style.use('ggplot')

In [87]:
fig, ax = plt.subplots(figsize=(8,5))

df.plot(kind='scatter', x= 'Approve_Obama', y='Favor_Iran',ax=ax,s=50)
plt.plot(df["Approve_Obama"],slope*df["Approve_Obama"]+intercept,"-",color="red",linewidth=2)

ax.set_title("Feelings On Obama Predict Feelings On Iran Deal")
ax.set_ylabel('Favor Iran deal')
ax.set_xlabel("Approve of Obama")


Out[87]:
<matplotlib.text.Text at 0x119ca4b00>

In [88]:
df.corr()


Out[88]:
Favor_Iran Approve_Obama
Favor_Iran 1.000000 0.913868
Approve_Obama 0.913868 1.000000

In [ ]: