In [1]:
from pandas import Series, DataFrame
In [2]:
import pandas as pd
In [3]:
import numpy as np
In [4]:
import matplotlib.pyplot as plt
In [32]:
import matplotlib
In [5]:
%matplotlib inline
In [6]:
weather = pd.read_table('data/daily_weather.tsv')
In [7]:
stations = pd.read_table('data/stations.tsv')
In [8]:
usage = pd.read_table('data/usage_2012.tsv')
In [9]:
weather['date'] = pd.to_datetime(weather['date'])
In [10]:
weather.loc[weather['season_code'] == 1, 'season_desc'] = 'winter'
weather.loc[weather['season_code'] == 2, 'season_desc'] = 'spring'
weather.loc[weather['season_code'] == 3, 'season_desc'] = 'summer'
weather.loc[weather['season_code'] == 4, 'season_desc'] = 'fall'
In [11]:
weather.loc[weather['season_desc'] == 'winter', 'season_code'] = 4
weather.loc[weather['season_desc'] == 'spring', 'season_code'] = 1
weather.loc[weather['season_desc'] == 'summer', 'season_code'] = 2
weather.loc[weather['season_desc'] == 'fall', 'season_code'] = 3
In [12]:
weather.plot(x='date', y='temp')
plt.show()
In [13]:
temp_humid = weather[['temp', 'humidity']].groupby(weather['date'].dt.month).mean()
In [14]:
temp_humid.plot(kind='bar', width=0.75, color=['#EE4444','#4444EE'])
plt.show()
In [15]:
spring_daily_vol = weather.loc[weather['season_desc'] == 'spring']
summer_daily_vol = weather.loc[weather['season_desc'] == 'summer']
fall_daily_vol = weather.loc[weather['season_desc'] == 'fall']
winter_daily_vol = weather.loc[weather['season_desc'] == 'winter']
In [16]:
spr_ax = spring_daily_vol.plot(kind='scatter', x='temp', y='total_riders', c='yellow', s=50, alpha=.4)
sum_ax = summer_daily_vol.plot(kind='scatter', x='temp', y='total_riders', c='lightgreen', s=50, alpha=.4, ax=spr_ax)
fal_ax = fall_daily_vol.plot(kind='scatter', x='temp', y='total_riders', c='#ee5555', s=50, alpha=.4, ax=sum_ax)
win_ax = winter_daily_vol.plot(kind='scatter', x='temp', y='total_riders', c='lightblue', s=50, alpha=.4, ax=fal_ax)
plt.title('Temp vs Daily Rental Volume')
plt.show()
In [17]:
spr_ax = spring_daily_vol.plot(kind='scatter', x='windspeed', y='total_riders', c='yellow', s=50, alpha=.4)
sum_ax = summer_daily_vol.plot(kind='scatter', x='windspeed', y='total_riders', c='lightgreen', s=50, alpha=.4, ax=spr_ax)
fal_ax = fall_daily_vol.plot(kind='scatter', x='windspeed', y='total_riders', c='#ee5555', s=50, alpha=.4, ax=sum_ax)
win_ax = winter_daily_vol.plot(kind='scatter', x='windspeed', y='total_riders', c='lightblue', s=50, alpha=.4, ax=fal_ax)
plt.title('Windspeed vs Daily Rental Volume')
plt.show()
In [18]:
usage_stations = usage[['station_start']]
In [19]:
usage_stations.head()
Out[19]:
In [20]:
stations_geo = DataFrame({'lat': stations.lat, 'long': stations.long})
stations_geo.index = stations.station.values
In [21]:
stations_geo.head()
Out[21]:
In [22]:
station_count = DataFrame(usage_stations['station_start'].value_counts())
In [23]:
station_count.head()
Out[23]:
In [24]:
stations_geo['rides'] = station_count
In [25]:
stations_geo.head()
Out[25]:
In [26]:
cleared = stations_geo.dropna()
In [27]:
cleared.head()
Out[27]:
In [77]:
cleared.plot(kind='scatter', x='long', y='lat', s=(cleared['rides'] / 366) * 5, alpha=0.6, figsize=(10, 10))
plt.show()