In [6]:
from pandas import DataFrame, Series
In [8]:
import pandas as pd
In [9]:
import numpy as np
In [16]:
import matplotlib.pyplot as plt
In [17]:
%matplotlib inline
In [18]:
weather = pd.read_table('data/daily_weather.tsv')
In [19]:
season_mapping = {'Spring': 'Winter', 'Winter': 'Fall', 'Fall': 'Summer', 'Summer': 'Spring'}
In [20]:
weather['season_desc'] = weather['season_desc'].map(season_mapping)
In [32]:
plt.plot(weather['temp'], 'b-')
plt.axis(xmax=365)
plt.axis(ymin=0, ymax=1)
plt.xlabel('Days')
plt.ylabel('Temperature')
plt.show()
In [47]:
weather['Month'] = pd.DatetimeIndex(weather_data.date).month
In [48]:
weather.groupby('Month')['temp', 'humidity'].mean().plot(kind='bar')
Out[48]:
In [73]:
plt.scatter(
weather[weather.season_desc == 'Winter']['temp'],
weather[weather.season_desc == 'Winter']['total_riders'],
s=20, c='b', alpha=.3, label='Winter'
)
plt.scatter(
weather[weather.season_desc == 'Spring']['temp'],
weather[weather.season_desc == 'Spring']['total_riders'],
s=20, c='g', alpha=.3, label='Spring'
)
plt.scatter(
weather[weather.season_desc == 'Summer']['temp'],
weather[weather.season_desc == 'Summer']['total_riders'],
s=20, c='y', alpha=.3, label='Summer'
)
plt.scatter(
weather[weather.season_desc == 'Fall']['temp'],
weather[weather.season_desc == 'Fall']['total_riders'],
s=20, c='r', alpha=.3, label='Fall'
)
plt.title("Daily Rental Volume Versus Temperature")
plt.legend(loc=4)
plt.show()
In [74]:
plt.scatter(
weather[weather.season_desc == 'Winter']['windspeed'],
weather[weather.season_desc == 'Winter']['total_riders'],
s=20, c='b', alpha=.3, label='Winter'
)
plt.scatter(
weather[weather.season_desc == 'Spring']['windspeed'],
weather[weather.season_desc == 'Spring']['total_riders'],
s=20, c='g', alpha=.3, label='Spring'
)
plt.scatter(
weather[weather.season_desc == 'Summer']['windspeed'],
weather[weather.season_desc == 'Summer']['total_riders'],
s=20, c='y', alpha=.3, label='Summer'
)
plt.scatter(
weather[weather.season_desc == 'Fall']['windspeed'],
weather[weather.season_desc == 'Fall']['total_riders'],
s=20, c='r', alpha=.3, label='Fall'
)
plt.title("Daily Rental Volume Versus Windspeed")
plt.legend(loc=4)
plt.show()
In [81]:
stations = pd.read_table('data/stations.tsv')
In [83]:
usage = pd.read_table('data/usage_2012.tsv')
In [95]:
s = stations[['station', 'lat', 'long']]
In [96]:
u = pd.concat([usage['station_start']], axis=1, keys=['station'])
In [98]:
counts = u['station'].value_counts()
In [103]:
c = DataFrame(counts.index, columns=['station'])
In [110]:
c['counts'] = counts.values
In [119]:
c['counts'] = c['counts'].apply(lambda x: x / 365)
In [120]:
m = pd.merge(s, c, on='station')
In [122]:
plt.scatter(m['long'], m['lat'], c='b', label='Location', s=(m['counts'] * 10), alpha=.1)
plt.legend(loc='lower right')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()