Using data from this FiveThirtyEight post, write code to calculate the correlation of the responses from the poll.Respond to the story in your PR. Is this a good example of data journalism? Why or why not?


In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

In [2]:
df=pd.read_csv("Obama_Iran.csv")

In [3]:
df.head()


Out[3]:
Group Obama_approval Favor_Irandeal
0 Dem 78 60
1 Rep 10 34
2 Ind 37 44
3 Men 41 46
4 Women 47 47

In [4]:
lm = smf.ols(formula='Obama_approval~Favor_Irandeal',data=df).fit()
lm.params


Out[4]:
Intercept        -58.447611
Favor_Irandeal     2.256738
dtype: float64

In [5]:
intercept, slope = lm.params

In [10]:
fig, ax = plt.subplots(figsize =(7,5))
df.plot(kind='scatter', x= 'Obama_approval', y='Favor_Irandeal',ax=ax,s=50)
plt.plot(df["Obama_approval"],slope*df["Obama_approval"]+intercept,"-",color="red",linewidth=2)
ax.set_title("Feelings On Obama Predict Feelings On Iran Deal")
ax.set_ylabel("Favor Iran deal")
ax.set_xlabel("Approve of Obama")
ax.xaxis.grid(color='darkgrey', linestyle='-', linewidth=0.5)
ax.yaxis.grid(color='darkgrey', linestyle='-', linewidth=0.5)
ax.set_axisbelow(True) #make the grid lines be drawn behind other graph elements, such as line chart



In [8]:
df.corr()


Out[8]:
Obama_approval Favor_Irandeal
Obama_approval 1.000000 0.870377
Favor_Irandeal 0.870377 1.000000

In [ ]: