notebook.community

Edit and run



In [1]:

    
advt = pd.read_csv("Advertising.csv")









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-250dca37d2ab> in <module>()
----> 1 advt = pd.read_csv("Advertising.csv")

NameError: name 'pd' is not defined



In [2]:

    
import pandas as pd
import numpy as np



In [3]:

    
advt = pd.read_csv("Advertising.csv")



In [4]:

    
advt.head()



In [5]:

    
advt.tail()



In [ ]:



In [6]:

    
advt.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
Unnamed: 0    200 non-null int64
TV            200 non-null float64
Radio         200 non-null float64
Newspaper     200 non-null float64
Sales         200 non-null float64
dtypes: float64(4), int64(1)
memory usage: 7.9 KB



In [7]:

    
advt = advt[["TV", "Radio", "Newspaper", "Sales"]]



In [8]:

    
advt.head()



In [9]:

    
advt.describe()



In [10]:

    
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib









    



Using matplotlib backend: MacOSX



In [12]:

    
sns.distplot(advt.Sales)









    



//anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[12]:





<matplotlib.axes._subplots.AxesSubplot at 0x11898d358>



In [13]:

    
sns.distplot(advt.Sales)









    



//anaconda/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x11b6a3b38>



In [14]:

    
sns.jointplot(advt.Newspaper, advt.Sales)









    Out[14]:





<seaborn.axisgrid.JointGrid at 0x11b6a0a20>



In [15]:

    
advt.TV.corr(advt.Sales)









    Out[15]:





0.78222442486160615



In [16]:

    
advt.Newspaper.corr(advt.Sales)









    Out[16]:





0.22829902637616528



In [17]:

    
advt.corr()



In [18]:

    
sns.heatmap(advt.corr())









    Out[18]:





<matplotlib.axes._subplots.AxesSubplot at 0x11d2ae4e0>



In [19]:

    
#building a linear regression model

#Sales is the response variable and TV is the predictor
import statsmodels.formula.api as smf



In [21]:

    
lm = smf.ols( 'Sales ~ TV', advt ).fit()



In [22]:

    
lm.params









    Out[22]:





Intercept    7.032594
TV           0.047537
dtype: float64



In [23]:

    
lm.conf_int()



In [24]:

    
lm.rsquared









    Out[24]:





0.61187505085007099



In [ ]:

	Unnamed: 0	TV	Radio	Newspaper	Sales
0	1	230.1	37.8	69.2	22.1
1	2	44.5	39.3	45.1	10.4
2	3	17.2	45.9	69.3	9.3
3	4	151.5	41.3	58.5	18.5
4	5	180.8	10.8	58.4	12.9

	Unnamed: 0	TV	Radio	Newspaper	Sales
195	196	38.2	3.7	13.8	7.6
196	197	94.2	4.9	8.1	9.7
197	198	177.0	9.3	6.4	12.8
198	199	283.6	42.0	66.2	25.5
199	200	232.1	8.6	8.7	13.4

	TV	Radio	Newspaper	Sales
0	230.1	37.8	69.2	22.1
1	44.5	39.3	45.1	10.4
2	17.2	45.9	69.3	9.3
3	151.5	41.3	58.5	18.5
4	180.8	10.8	58.4	12.9

	TV	Radio	Newspaper	Sales
count	200.000000	200.000000	200.000000	200.000000
mean	147.042500	23.264000	30.554000	14.022500
std	85.854236	14.846809	21.778621	5.217457
min	0.700000	0.000000	0.300000	1.600000
25%	74.375000	9.975000	12.750000	10.375000
50%	149.750000	22.900000	25.750000	12.900000
75%	218.825000	36.525000	45.100000	17.400000
max	296.400000	49.600000	114.000000	27.000000

	0	1
Intercept	6.129719	7.935468
TV	0.042231	0.052843

	TV	Radio	Newspaper	Sales
TV	1.000000	0.054809	0.056648	0.782224
Radio	0.054809	1.000000	0.354104	0.576223
Newspaper	0.056648	0.354104	1.000000	0.228299
Sales	0.782224	0.576223	0.228299	1.000000