In [15]:
import numpy as np
import pandas as pd
import datetime
from pandas import Series, DataFrame
stations = pd.read_table('stations.tsv')
usage = pd.read_table('usage_2012.tsv')
weather = pd.read_table('daily_weather.tsv')
def change_seasons():
weather.loc[weather["season_code"] == 1, "season_desc"] = 'Winter'
weather.loc[weather["season_code"] == 2, "season_desc"] = 'Spring'
weather.loc[weather["season_code"] == 3, "season_desc"] = 'Summer'
weather.loc[weather["season_code"] == 4, "season_desc"] = 'Fall'
def convert_dates():
for i in weather.index:
weather.ix[i, 'date'] = datetime.datetime.strptime(
str(weather.ix[i, 'date']), "%Y-%m-%d").date()
def add_months():
for i in weather.index:
weather.ix[i, 'month'] = weather.ix[i, 'date'].month
change_seasons()
convert_dates()
add_months()
In [23]:
months = weather[['month', 'subjective_temp', 'total_riders']].groupby('month')
corrdf = months.corr()
# Doing some NA val cleanup
del corrdf['month']
corrdf = corrdf.dropna()
# And now done
print corrdf
As we can see from the correlations, ridership goes up with temperature in the winter and goes down with increasing temperature in the summer. Riders do not seem to appreciate especially cold or hot temperatures, except for the month of May, when riders seem to not be especially encouraged or discouraged by temperature.
In [29]:
seasons = weather[['season_desc', 'subjective_temp', 'total_riders']].groupby('season_desc')
corrdf = seasons.corr()
# Doing some NA val cleanup
# del corrdf['season_desc']
corrdf = corrdf.dropna()
# And now done
print corrdf
In [ ]: