In [1]:
advt = pd.read_csv("Advertising.csv")
In [2]:
import pandas as pd
import numpy as np
In [3]:
advt = pd.read_csv("Advertising.csv")
In [4]:
advt.head()
Out[4]:
In [5]:
advt.tail()
Out[5]:
In [ ]:
In [6]:
advt.info()
In [7]:
advt = advt[["TV", "Radio", "Newspaper", "Sales"]]
In [8]:
advt.head()
Out[8]:
In [9]:
advt.describe()
Out[9]:
In [10]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib
In [12]:
sns.distplot(advt.Sales)
Out[12]:
In [13]:
sns.distplot(advt.Sales)
Out[13]:
In [14]:
sns.jointplot(advt.Newspaper, advt.Sales)
Out[14]:
In [15]:
advt.TV.corr(advt.Sales)
Out[15]:
In [16]:
advt.Newspaper.corr(advt.Sales)
Out[16]:
In [17]:
advt.corr()
Out[17]:
In [18]:
sns.heatmap(advt.corr())
Out[18]:
In [19]:
#building a linear regression model
#Sales is the response variable and TV is the predictor
import statsmodels.formula.api as smf
In [21]:
lm = smf.ols( 'Sales ~ TV', advt ).fit()
In [22]:
lm.params
Out[22]:
In [23]:
lm.conf_int()
Out[23]:
In [24]:
lm.rsquared
Out[24]:
In [ ]: