In [6]:
import pandas as pd
In [7]:
import numpy as np
In [8]:
import matplotlib.pyplot as plt
In [9]:
from pandas import DataFrame, Series
In [10]:
%pylab inline
In [11]:
c_cycle=("#3498db","#e74c3c","#1abc9c","#9b59b6","#f1c40f","#ecf0f1","#34495e",
"#446cb3","#d24d57","#27ae60","#663399", "#f7ca18","#bdc3c7","#2c3e50")
mpl.rc('font', family='Bitstream Vera Sans', size=20)
mpl.rc('lines', linewidth=2,color="#2c3e50")
mpl.rc('patch', linewidth=0,facecolor="none",edgecolor="none")
mpl.rc('text', color='#2c3e50')
mpl.rc('axes', facecolor='none',edgecolor="none",titlesize=25,labelsize=15,color_cycle=c_cycle,grid=False)
mpl.rc('xtick.major',size=10,width=0)
mpl.rc('ytick.major',size=10,width=0)
mpl.rc('xtick.minor',size=10,width=0)
mpl.rc('ytick.minor',size=10,width=0)
mpl.rc('ytick',direction="out")
mpl.rc('grid',color='#c0392b',alpha=0.3,linewidth=1)
mpl.rc('legend',numpoints=3,fontsize=15,borderpad=0,markerscale=3,labelspacing=0.2,frameon=False,framealpha=0.6,handlelength=1,handleheight=0.5)
mpl.rc('figure',figsize=(10,6),dpi=80,facecolor="none",edgecolor="none")
mpl.rc('savefig',dpi=100,facecolor="none",edgecolor="none")
Changed design
In [12]:
weather = pd.read_table("daily_weather.tsv")
In [13]:
usage = pd.read_table("usage_2012.tsv")
In [14]:
station = pd.read_table("stations.tsv")
In [15]:
weather.loc[weather['season_code'] == 1, 'season_desc'] = 'winter'
In [16]:
weather.loc[weather['season_code'] == 2, 'season_desc'] = 'spring'
In [17]:
weather.loc[weather['season_code'] == 3, 'season_desc'] = 'summer'
In [18]:
weather.loc[weather['season_code'] == 4, 'season_desc'] = 'fall'
In [19]:
weather['date'] = pd.to_datetime(weather['date'])
In [20]:
month_rental = weather.groupby(weather['date'].dt.month)['total_riders'].sum()
In [21]:
mean = weather.groupby('season_desc')['temp'].mean()
In [22]:
weather['temp'].plot()
Out[22]:
In [23]:
weather['month'] = pd.DatetimeIndex(weather.date).month
In [24]:
weather.groupby('month')['temp', 'humidity'].mean().plot(kind='bar')
Out[24]:
In [25]:
plt.scatter(
weather[weather['season_desc'] == 'winter']['temp'],
weather[weather['season_desc'] == 'winter']['total_riders'],
s=30, color='blue', label='winter')
plt.scatter(
weather[weather['season_desc'] == 'spring']['temp'],
weather[weather['season_desc'] == 'spring']['total_riders'],
s=30, color='magenta', label='spring')
plt.scatter(
weather[weather['season_desc'] == 'summer']['temp'],
weather[weather['season_desc'] == 'summer']['total_riders'],
s=30, color='cyan', label='summer')
plt.scatter(
weather[weather['season_desc'] == 'fall']['temp'],
weather[weather['season_desc'] == 'fall']['total_riders'],
s=30, color='yellow', label='fall')
plt.title("Daily rental volume and temperature")
plt.legend(loc=4)
plt.show()
In [26]:
plt.scatter(
weather[weather['season_desc'] == 'winter']['windspeed'],
weather[weather['season_desc'] == 'winter']['total_riders'],
s=30, color='blue', label='winter')
plt.scatter(
weather[weather['season_desc'] == 'spring']['windspeed'],
weather[weather['season_desc'] == 'spring']['total_riders'],
s=30, color='magenta', label='spring')
plt.scatter(
weather[weather['season_desc'] == 'summer']['windspeed'],
weather[weather['season_desc'] == 'summer']['total_riders'],
s=30, color='cyan', label='summer')
plt.scatter(
weather[weather['season_desc'] == 'fall']['windspeed'],
weather[weather['season_desc'] == 'fall']['total_riders'],
s=30, color='yellow', label='fall')
plt.title("Daily rental volume and windspeed")
plt.legend(loc=1)
plt.show()
In [27]:
stations = station [['station', 'lat', 'long']]
In [28]:
stations
Out[28]:
In [29]:
count = usage['station_start'].value_counts()
In [30]:
average_rental_df = DataFrame({ 'average_rental' : count / 365})
In [34]:
average_rental_df.index = range(185)
In [35]:
average_rental_df
Out[35]:
In [32]:
merged_df = pd.concat([stations, average_rental_df], axis=1)
In [29]:
plt.scatter(merged_df['long'], merged_df['lat'], color=c_cycle, alpha=0.5, s=(merged_df['average_rental']*10), label='Location of stations', )
plt.legend(bbox_to_anchor=(1.2, 0.2), loc='lower right', borderaxespad=0)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Rental volume and geography')
plt.show()
In [ ]: