In [1]:
import pandas as pd
In [2]:
# some display options to make figures bigger
pd.set_option('display.max_columns', 15)
rcParams['figure.figsize'] = (17, 7)
In [3]:
bike_data = pd.read_csv("./2012.csv", encoding='latin1', sep=';', index_col='Date', parse_dates=True, dayfirst=True)
In [4]:
bike_data = bike_data.dropna(axis=1)
In [5]:
bike_data.head()
Out[5]:
In [6]:
bike_data.plot()
Out[6]:
In [7]:
bike_data.describe()
Out[7]:
In [8]:
bike_data[['Berri 1', 'Maisonneuve 2']].plot()
Out[8]:
In [9]:
def get_weather_data(year):
url_template = "http://climate.weatheroffice.gc.ca/climateData/bulkdata_e.html?Prov=QC&StationID=5415&Year={year}&Month={month}&Day=14&timeframe=1&format=csv"
data_by_month = []
for month in range(1, 13):
url = url_template.format(year=year, month=month)
weather_data = pd.read_csv(url, skiprows=16, index_col='Date/Time', parse_dates=True).dropna(axis=1)
weather_data.columns = map(lambda x: x.replace('\xb0', ''), weather_data.columns)
weather_data = weather_data.drop(['Year', 'Day', 'Month', 'Time', 'Data Quality'], axis=1)
data_by_month.append(weather_data.dropna())
return pd.concat(data_by_month)
In [10]:
weather_data = get_weather_data(2012)
print list(weather_data.columns)
In [11]:
weather_data[['Temp (C)', 'Weather', 'Wind Spd (km/h)', 'Rel Hum (%)', 'Wind Spd (km/h)']].head()
Out[11]:
In [12]:
bike_data['mean temp'] = weather_data['Temp (C)'].resample('D', how='mean')
In [13]:
bike_data.head()
Out[13]:
In [14]:
bike_data[['Berri 1', 'mean temp']].plot(subplots=True)
Out[14]:
In [15]:
bike_data['Rain'] = weather_data['Weather'].str.contains('Rain').map(lambda x: int(x)).resample('D', how='mean')
In [16]:
bike_data[['Berri 1', 'Rain']].plot(subplots=True)
Out[16]:
In [17]:
bike_data['weekday'] = bike_data.index.weekday
bike_data.head()
Out[17]:
In [18]:
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
bike_data['weekday'] = bike_data['weekday'].map(lambda x: days[x])
In [19]:
bike_data.head()
Out[19]:
In [20]:
counts_by_day = bike_data.groupby('weekday').aggregate(numpy.sum)
In [21]:
counts_by_day.index = days
In [22]:
counts_by_day.plot()
Out[22]: