In [4]:
from pandas import Series, DataFrame
In [5]:
import pandas as pd
In [6]:
import numpy as np
In [7]:
weather = pd.read_table('daily_weather.tsv')
In [8]:
stations = pd.read_table('stations.tsv')
In [9]:
usage = pd.read_table('usage_2012.tsv')
In [10]:
newseasons = {'Summer': 'Spring', 'Spring': 'Winter', 'Fall': 'Summer', 'Winter': 'Fall'}
In [11]:
weather['season_desc'] = weather['season_desc'].map(newseasons)
In [12]:
pd.pivot_table(weather, 'temp', 'season_desc', aggfunc=np.average)
Out[12]:
In [13]:
weather['Month'] = pd.DatetimeIndex(weather.date).month
In [14]:
pd.pivot_table(weather, 'total_riders', 'Month', aggfunc=np.sum)
Out[14]:
In [15]:
pd.concat([weather['temp'], weather['total_riders']], axis=1).corr()
Out[15]:
In [16]:
weather[['temp', 'total_riders', 'Month']].groupby('Month').corr()
Out[16]:
In [19]:
pd.concat([weather['temp'], weather['no_casual_riders'], weather['no_reg_riders']], axis=1, keys=['temp', 'Non-Regulars', 'Regulars']).corr()
Out[19]:
In [17]:
pd.concat([weather['is_work_day'], weather['no_casual_riders'], weather['no_reg_riders']], axis=1, keys=['Is_Workday', 'Non-Regulars', 'Regulars']).corr()
Out[17]:
In [18]:
pd.concat([weather['is_holiday'], weather['no_casual_riders'], weather['no_reg_riders']], axis=1, keys=['Is_Holiday', 'Non-Regulars', 'Regulars']).corr()
Out[18]: