In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor, export_graphviz
In [2]:
# read the data and set "datetime" as the index
url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/bikeshare.csv'
bikes = pd.read_csv(url, index_col='datetime', parse_dates=True)
In [3]:
# "count" is a method, so it's best to rename that column
bikes.rename(columns={'count':'total'}, inplace=True)
In [4]:
# create "hour" as its own feature
bikes['hour'] = bikes.index.hour
In [5]:
bikes.head()
Out[5]:
In [6]:
bikes.tail()
Out[6]:
In [7]:
# mean rentals for each value of "workingday"
bikes.groupby('workingday').total.mean()
Out[7]:
In [8]:
# mean rentals for each value of "hour"
bikes.groupby('hour').total.mean()
Out[8]:
In [9]:
# mean rentals for each value of "hour"
bikes.groupby('hour').total.mean().plot()
Out[9]:
Plot for workingday == 0 and workingday == 1
In [10]:
# hourly rental trend for "workingday=0"
bikes[bikes.workingday==0].groupby('hour').total.mean().plot()
Out[10]:
In [11]:
# hourly rental trend for "workingday=1"
bikes[bikes.workingday==1].groupby('hour').total.mean().plot()
Out[11]:
In [12]:
# combine the two plots
bikes.groupby(['hour', 'workingday']).total.mean().unstack().plot()
Out[12]:
Write about your findings
In [ ]:
In [ ]:
In [ ]: