Using data from this FiveThirtyEight post, write code to calculate the correlation of the responses from the poll.Respond to the story in your PR. Is this a good example of data journalism? Why or why not?



In [1]:

    
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf



In [2]:

    
df=pd.read_csv("Obama_Iran.csv")



In [3]:

    
df.head()









    Out[3]:






  
    
      
      Group
      Obama_approval
      Favor_Irandeal
    
  
  
    
      0
      Dem
      78
      60
    
    
      1
      Rep
      10
      34
    
    
      2
      Ind
      37
      44
    
    
      3
      Men
      41
      46
    
    
      4
      Women
      47
      47



In [4]:

    
lm = smf.ols(formula='Obama_approval~Favor_Irandeal',data=df).fit()
lm.params









    Out[4]:





Intercept        -58.447611
Favor_Irandeal     2.256738
dtype: float64



In [5]:

    
intercept, slope = lm.params



In [10]:

    
fig, ax = plt.subplots(figsize =(7,5))
df.plot(kind='scatter', x= 'Obama_approval', y='Favor_Irandeal',ax=ax,s=50)
plt.plot(df["Obama_approval"],slope*df["Obama_approval"]+intercept,"-",color="red",linewidth=2)
ax.set_title("Feelings On Obama Predict Feelings On Iran Deal")
ax.set_ylabel("Favor Iran deal")
ax.set_xlabel("Approve of Obama")
ax.xaxis.grid(color='darkgrey', linestyle='-', linewidth=0.5)
ax.yaxis.grid(color='darkgrey', linestyle='-', linewidth=0.5)
ax.set_axisbelow(True) #make the grid lines be drawn behind other graph elements, such as line chart



In [8]:

    
df.corr()









    Out[8]:






  
    
      
      Obama_approval
      Favor_Irandeal
    
  
  
    
      Obama_approval
      1.000000
      0.870377
    
    
      Favor_Irandeal
      0.870377
      1.000000



In [ ]:

	Obama_approval	Favor_Irandeal
Obama_approval	1.000000	0.870377
Favor_Irandeal	0.870377	1.000000