In [2]:
#import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
bikes = pd.read_csv('../data/2016-Q1-Trips-History-Data.csv')
bikes.head()
bikes['start'] = pd.to_datetime(bikes['Start date'])
%time bikes['end'] = pd.to_datetime(bikes['End date'])
In [3]:
bikes.head()
Out[3]:
In [15]:
bikes['hour_of_day'] = (bikes.start.dt.hour + (bikes.start.dt.minute/60).round(2))
hours = bikes.groupby('hour_of_day').agg('count')
hours['hour'] = hours.index
hours.start.plot()
# import seaborn as sns
sns.lmplot(x='hour', y='start', data=hours, aspect=1.5, scatter_kws={'alpha':0.2})
Out[15]:
In [18]:
hours[5:8].start.plot()
# import seaborn as sns
sns.lmplot(x='hour', y='start', data=hours[5:8], aspect=1.5, scatter_kws={'alpha':0.5})
Out[18]:
$y = \beta_0 + \beta_1x_1 + \beta_2x_2 + ... + \beta_nx_n$
The $\beta$ values are called the model coefficients:
In the diagram above:
In [19]:
# fit a linear regression model
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
feature_cols = ['al']
X = hours[['hour']]
y = hours.start
linreg.fit(X, y)
Out[19]:
In [23]:
hours['pred'] = linreg.predict(X)
# put the plots together
plt.scatter(hours.hour, hours.start)
plt.plot(hours.hour, hours.pred, color='red')
plt.xlabel('hours')
plt.ylabel('count')
Out[23]:
In [41]:
# fit a linear regression model
from sklearn.linear_model import LinearRegression
linreg = None
linreg = LinearRegression()
partial_hours = hours.loc[5.5:9]
X = partial_hours[['hour']]
y = partial_hours.start
linreg.fit(X, y)
hours.loc[5.5:9, 'pred'] = linreg.predict(partial_hours[['hour']])
# put the plots together
plt.scatter(hours.hour, hours.start)
plt.plot(partial_hours.hour, partial_hours.pred, color='red')
plt.xlabel('hours')
plt.ylabel('count')
Out[41]:
Step 1: Import the class you plan to use
Step 2: "Instantiate" the "estimator"
Step 3: Fit the model with data (aka "model training")
Step 4: Predict the response for a new observation
Interpreting the intercept ($\beta_0$):
Interpreting the "temp" coefficient ($\beta_1$):