In [6]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
In [7]:
weather = pd.read_table('daily_weather.tsv')
In [8]:
weather.groupby('season_desc').agg({'temp': np.mean})
Out[8]:
In [9]:
fix = weather.replace("Fall", "Summer_").replace("Summer", "Spring_").replace("Winter", "Fall_").replace("Spring", "Winter_")
In [10]:
weather.groupby('season_desc').agg({'temp': np.mean})
Out[10]:
In [11]:
weather['months'] = pd.DatetimeIndex(weather.date).month
In [12]:
weather.groupby('months').agg({'total_riders': np.sum})
Out[12]:
weather[['total_riders', 'temp']].corr()
3.Investigate how the number of rentals varies with temperature. Is this trend constant across seasons? Across months?
In [13]:
weather[['total_riders', 'temp', 'months']].groupby('months').corr()
Out[13]:
weather[['total_riders', 'temp', 'season_desc']].groupby('season_desc').corr()
In [14]:
weather[['no_casual_riders', 'no_reg_riders', 'temp']].corr()
Out[14]:
4.There are various types of users in the usage data sets. What sorts of things can you say about how they use the bikes differently?
In [15]:
weather[['no_casual_riders', 'no_reg_riders']].corr()
Out[15]:
In [16]:
weather[['is_holiday', 'total_riders']].sum()
Out[16]:
In [17]:
weather[['is_holiday', 'total_riders']].corr()
Out[17]:
In [ ]:
Part 2
In [18]:
import matplotlib.pyplot as plt
In [19]:
%matplotlib inline
In [ ]:
In [ ]:
In [20]:
plt.plot(weather['months'], weather['temp'])
plt.xlabel("This is just an x-axis")
plt.ylabel("This is just a y-axis")
plt.show()
In [21]:
x = weather.groupby('months').agg({"humidity":np.mean})
In [22]:
plt.bar([n for n in range(1, 13)], x['humidity'])
plt.title("weather and humidity by months")
plt.show()
In [23]:
xs = range(10)
plt.scatter(xs, 5 * np.random.rand(10) + xs, color='r', marker='*', label='series1')
plt.scatter(xs, 5 * np.random.rand(10) + xs, color='g', marker='o', label='series2')
plt.title("A scatterplot with two series")
plt.legend(loc=9)
plt.show()
In [24]:
w = weather[['season_desc', 'temp', 'total_riders']]
fall = w.loc[w['season_desc'] == 'Fall']
winter = w.loc[w['season_desc'] == 'Winter']
spring = w.loc[w['season_desc'] == 'Spring']
summer = w.loc[w['season_desc'] == 'Summer']
plt.scatter(fall['temp'], fall['total_riders'], color='orange', marker='^', label='fall', s=100, alpha=.41)
plt.scatter(winter['temp'], winter['total_riders'], color='blue', marker='*', label='winter', s=100, alpha=.41)
plt.scatter(spring['temp'], spring['total_riders'], color='purple', marker='d', label='spring', s=100, alpha=.41)
plt.scatter(summer['temp'], summer['total_riders'], color='red', marker='o', label='summer', s=100, alpha=.41)
plt.legend(loc='lower right')
plt.xlabel('temperature')
plt.ylabel('rental volume')
plt.show()
Create another scatterplot to show how daily rental volume varies with windspeed. As above, use a different series for each season.
In [ ]:
In [ ]:
In [25]:
w = weather[['season_desc', 'windspeed', 'total_riders']]
fall = w.loc[w['season_desc'] == 'Fall']
winter = w.loc[w['season_desc'] == 'Winter']
spring = w.loc[w['season_desc'] == 'Spring']
summer = w.loc[w['season_desc'] == 'Summer']
plt.scatter(fall['windspeed'], fall['total_riders'], color='orange', marker='^', label='fall', s=100, alpha=.41)
plt.scatter(winter['windspeed'], winter['total_riders'], color='blue', marker='*', label='winter', s=100, alpha=.41)
plt.scatter(spring['windspeed'], spring['total_riders'], color='purple', marker='d', label='spring', s=100, alpha=.41)
plt.scatter(summer['windspeed'], summer['total_riders'], color='red', marker='o', label='summer', s=100, alpha=.41)
plt.legend(loc='lower right')
plt.xlabel('windspeed x1000 mph')
plt.ylabel('rental volume')
Out[25]:
In [26]:
usage = pd.read_table('usage_2012.tsv')
In [27]:
stations = pd.read_table('stations.tsv')
In [28]:
stations.head()
Out[28]:
In [32]:
c = DataFrame(counts.index, columns=['station'])
c['counts'] = counts.values
s = stations[['station','lat','long']]
u = pd.concat([usage['station_start']], axis=1, keys=['station'])
counts = u['station'].value_counts()
m = pd.merge(s, c, on='station')
In [33]:
plt.scatter(m['long'], m['lat'], c='b', label='Location', s=(m['counts'] * .05), alpha=.2)
plt.legend(loc='lower right')
plt.xlabel('longitude')
plt.ylabel('latitude')
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: