In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
Figure 2.1
In [5]:
data = pd.read_csv('data/Advertising.csv')
data.head()
Out[5]:
In [6]:
plt.figure(figsize=(18, 6))
plt.subplot(131)
sns.regplot(x='TV', y='Sales', data=data, scatter_kws={'color': 'red'})
plt.xlim(-5, 310)
plt.subplot(132)
sns.regplot(x='Radio', y='Sales', data=data, scatter_kws={'color': 'red'})
plt.xlim(-5, 55)
plt.subplot(133)
sns.regplot(x='Newspaper', y='Sales', data=data, scatter_kws={'color': 'red'})
plt.xlim(-5, 110)
Out[6]:
In [7]:
college = pd.read_csv('data/College.csv')
college.head()
Out[7]:
In [8]:
college.rename(columns={'Unnamed: 0': 'Name'}, inplace=True)
college.head(2)
Out[8]:
In [9]:
college.describe()
Out[9]:
In [10]:
sns.pairplot(college.iloc[:, 2:11])
Out[10]:
In [11]:
sns.boxplot(x='Private', y='Outstate', data=college)
Out[11]:
In [12]:
college.loc[college.loc[:, 'Top10perc'] > 50, 'Elite'] = 'Yes'
college.loc[college.loc[:, 'Top10perc'] <= 50, 'Elite'] = 'No'
print(college.Elite.value_counts())
sns.boxplot(x='Elite', y='Outstate', data=college)
Out[12]:
In [13]:
plt.figure(figsize=(10,10))
plt.subplot(221)
sns.distplot(college['Apps'], kde=False, bins=20)
plt.subplot(222)
sns.distplot(college['Apps'], kde=False, bins=100)
plt.subplot(223)
sns.distplot(college['Outstate'], kde=False, bins=20)
plt.subplot(224)
sns.distplot(college['Outstate'], kde=False, bins=100)
Out[13]: