Using data from this FiveThirtyEight post, write code to calculate the correlation of the responses from the poll.Respond to the story in your PR. Is this a good example of data journalism? Why or why not?
In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
In [2]:
df=pd.read_csv("Obama_Iran.csv")
In [3]:
df.head()
Out[3]:
In [4]:
lm = smf.ols(formula='Obama_approval~Favor_Irandeal',data=df).fit()
lm.params
Out[4]:
In [5]:
intercept, slope = lm.params
In [10]:
fig, ax = plt.subplots(figsize =(7,5))
df.plot(kind='scatter', x= 'Obama_approval', y='Favor_Irandeal',ax=ax,s=50)
plt.plot(df["Obama_approval"],slope*df["Obama_approval"]+intercept,"-",color="red",linewidth=2)
ax.set_title("Feelings On Obama Predict Feelings On Iran Deal")
ax.set_ylabel("Favor Iran deal")
ax.set_xlabel("Approve of Obama")
ax.xaxis.grid(color='darkgrey', linestyle='-', linewidth=0.5)
ax.yaxis.grid(color='darkgrey', linestyle='-', linewidth=0.5)
ax.set_axisbelow(True) #make the grid lines be drawn behind other graph elements, such as line chart
In [8]:
df.corr()
Out[8]:
In [ ]: