Assignment 4

Using data from this FiveThirtyEight post, write code to calculate the correlation of the responses from the poll.
Respond to the story in your PR. Is this a good example of data journalism? Why or why not?



In [1]:

    
import pandas as pd
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
%matplotlib inline



In [41]:

    
df = pd.read_csv("approval-iran.csv")



In [42]:

    
df









    Out[42]:






  
    
      
      Group
      Favor_Iran
      Approve_Obama
    
  
  
    
      0
      Republicans
      0.34
      0.10
    
    
      1
      Democrats
      0.60
      0.78
    
    
      2
      White
      0.45
      0.35
    
    
      3
      Black
      0.54
      0.85
    
    
      4
      Ind
      0.44
      0.37
    
    
      5
      Men
      0.46
      0.41
    
    
      6
      Women
      0.47
      0.47
    
    
      7
      Degree
      0.50
      0.47
    
    
      8
      Nodegree
      0.45
      0.43
    
    
      9
      age35-
      0.51
      0.56
    
    
      10
      age35-44
      0.46
      0.35
    
    
      11
      age55+
      0.45
      0.45
    
    
      12
      age65+
      0.41
      0.43
    
    
      13
      50k-
      0.47
      0.48
    
    
      14
      50k+
      0.48
      0.42
    
    
      15
      Lib
      0.61
      0.69
    
    
      16
      Cons
      0.35
      0.24
    
    
      17
      Teaparty
      0.35
      0.15



In [11]:

    
#df2 = df.set_index('Group')



In [49]:

    
lm = smf.ols(formula='Favor_Iran~Approve_Obama',data=df).fit()
lm.params









    Out[49]:





Intercept        0.305280
Approve_Obama    0.355619
dtype: float64



In [58]:

    
intercept, slope = lm.params



In [65]:

    
plt.style.use('ggplot')



In [87]:

    
fig, ax = plt.subplots(figsize=(8,5))

df.plot(kind='scatter', x= 'Approve_Obama', y='Favor_Iran',ax=ax,s=50)
plt.plot(df["Approve_Obama"],slope*df["Approve_Obama"]+intercept,"-",color="red",linewidth=2)

ax.set_title("Feelings On Obama Predict Feelings On Iran Deal")
ax.set_ylabel('Favor Iran deal')
ax.set_xlabel("Approve of Obama")









    Out[87]:





<matplotlib.text.Text at 0x119ca4b00>



In [88]:

    
df.corr()









    Out[88]:






  
    
      
      Favor_Iran
      Approve_Obama
    
  
  
    
      Favor_Iran
      1.000000
      0.913868
    
    
      Approve_Obama
      0.913868
      1.000000



In [ ]:

	Group	Favor_Iran	Approve_Obama
0	Republicans	0.34	0.10
1	Democrats	0.60	0.78
2	White	0.45	0.35
3	Black	0.54	0.85
4	Ind	0.44	0.37
5	Men	0.46	0.41
6	Women	0.47	0.47
7	Degree	0.50	0.47
8	Nodegree	0.45	0.43
9	age35-	0.51	0.56
10	age35-44	0.46	0.35
11	age55+	0.45	0.45
12	age65+	0.41	0.43
13	50k-	0.47	0.48
14	50k+	0.48	0.42
15	Lib	0.61	0.69
16	Cons	0.35	0.24
17	Teaparty	0.35	0.15