In [6]:
import pandas as pd

In [7]:
import numpy as np

In [8]:
import matplotlib.pyplot as plt

In [9]:
from pandas import DataFrame, Series

In [10]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [11]:
c_cycle=("#3498db","#e74c3c","#1abc9c","#9b59b6","#f1c40f","#ecf0f1","#34495e",
                  "#446cb3","#d24d57","#27ae60","#663399", "#f7ca18","#bdc3c7","#2c3e50")
mpl.rc('font', family='Bitstream Vera Sans', size=20)
mpl.rc('lines', linewidth=2,color="#2c3e50")
mpl.rc('patch', linewidth=0,facecolor="none",edgecolor="none")
mpl.rc('text', color='#2c3e50')
mpl.rc('axes', facecolor='none',edgecolor="none",titlesize=25,labelsize=15,color_cycle=c_cycle,grid=False)
mpl.rc('xtick.major',size=10,width=0)
mpl.rc('ytick.major',size=10,width=0)
mpl.rc('xtick.minor',size=10,width=0)
mpl.rc('ytick.minor',size=10,width=0)
mpl.rc('ytick',direction="out")
mpl.rc('grid',color='#c0392b',alpha=0.3,linewidth=1)
mpl.rc('legend',numpoints=3,fontsize=15,borderpad=0,markerscale=3,labelspacing=0.2,frameon=False,framealpha=0.6,handlelength=1,handleheight=0.5)
mpl.rc('figure',figsize=(10,6),dpi=80,facecolor="none",edgecolor="none")
mpl.rc('savefig',dpi=100,facecolor="none",edgecolor="none")

Changed design

1.Plot the daily temperature over the course of the year. (This should probably be a line chart.) Create a bar chart that shows the average temperature and humidity by month.


In [12]:
weather = pd.read_table("daily_weather.tsv")

In [13]:
usage = pd.read_table("usage_2012.tsv")

In [14]:
station = pd.read_table("stations.tsv")

In [15]:
weather.loc[weather['season_code'] == 1, 'season_desc'] = 'winter'

In [16]:
weather.loc[weather['season_code'] == 2, 'season_desc'] = 'spring'

In [17]:
weather.loc[weather['season_code'] == 3, 'season_desc'] = 'summer'

In [18]:
weather.loc[weather['season_code'] == 4, 'season_desc'] = 'fall'

In [19]:
weather['date'] = pd.to_datetime(weather['date'])

In [20]:
month_rental = weather.groupby(weather['date'].dt.month)['total_riders'].sum()

In [21]:
mean = weather.groupby('season_desc')['temp'].mean()

In [22]:
weather['temp'].plot()


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x1140a7cd0>

In [23]:
weather['month'] = pd.DatetimeIndex(weather.date).month

In [24]:
weather.groupby('month')['temp', 'humidity'].mean().plot(kind='bar')


Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x103744750>

2.Use a scatterplot to show how the daily rental volume varies with temperature. Use a different series (with different colors) for each season.


In [25]:
plt.scatter(
    weather[weather['season_desc'] == 'winter']['temp'],
    weather[weather['season_desc'] == 'winter']['total_riders'],
    s=30, color='blue', label='winter')
plt.scatter(
    weather[weather['season_desc'] == 'spring']['temp'],
    weather[weather['season_desc'] == 'spring']['total_riders'],
    s=30, color='magenta', label='spring')
plt.scatter(
    weather[weather['season_desc'] == 'summer']['temp'],
    weather[weather['season_desc'] == 'summer']['total_riders'],
    s=30, color='cyan', label='summer')
plt.scatter(
    weather[weather['season_desc'] == 'fall']['temp'],
    weather[weather['season_desc'] == 'fall']['total_riders'],
    s=30, color='yellow', label='fall')
plt.title("Daily rental volume and temperature")
plt.legend(loc=4)
plt.show()


3.Create another scatterplot to show how daily rental volume varies with windspeed. As above, use a different series for each season.


In [26]:
plt.scatter(
    weather[weather['season_desc'] == 'winter']['windspeed'],
    weather[weather['season_desc'] == 'winter']['total_riders'],
    s=30, color='blue', label='winter')
plt.scatter(
    weather[weather['season_desc'] == 'spring']['windspeed'],
    weather[weather['season_desc'] == 'spring']['total_riders'],
    s=30, color='magenta', label='spring')
plt.scatter(
    weather[weather['season_desc'] == 'summer']['windspeed'],
    weather[weather['season_desc'] == 'summer']['total_riders'],
    s=30, color='cyan', label='summer')
plt.scatter(
    weather[weather['season_desc'] == 'fall']['windspeed'],
    weather[weather['season_desc'] == 'fall']['total_riders'],
    s=30, color='yellow', label='fall')
plt.title("Daily rental volume and windspeed")
plt.legend(loc=1)
plt.show()


4.How do the rental volumes vary with geography? Compute the average daily rentals for each station and use this as the radius for a scatterplot of each station's latitude and longitude.


In [27]:
stations = station [['station', 'lat', 'long']]

In [28]:
stations


Out[28]:
station lat long
0 20th & Bell St 38.856100 -77.051200
1 18th & Eads St. 38.857250 -77.053320
2 20th & Crystal Dr 38.856400 -77.049200
3 15th & Crystal Dr 38.860170 -77.049593
4 Aurora Hills Community Ctr/18th & Hayes St 38.857866 -77.059490
5 Pentagon City Metro / 12th & S Hayes St 38.862303 -77.059936
6 S Joyce & Army Navy Dr 38.863700 -77.063300
7 Crystal City Metro / 18th & Bell St 38.857300 -77.051100
8 12th & Army Navy Dr 38.862900 -77.052800
9 27th & Crystal Dr 38.848441 -77.051516
10 S Glebe & Potomac Ave 38.842600 -77.050200
11 23rd & Crystal Dr 38.853300 -77.049800
12 26th & S Clark St 38.850688 -77.051520
13 19th St & Pennsylvania Ave NW 38.900300 -77.042900
14 14th & V St NW 38.917600 -77.032100
15 11th & Kenyon St NW 38.929464 -77.027822
16 16th & Harvard St NW 38.926088 -77.036536
17 Adams Mill & Columbia Rd NW 38.922925 -77.042581
18 14th & Harvard St NW 38.926800 -77.032200
19 Calvert & Biltmore St NW 38.923203 -77.047637
20 Lamont & Mt Pleasant NW 38.931900 -77.038800
21 4th & M St SW 38.876700 -77.017800
22 15th & P St NW 38.909850 -77.034438
23 14th & R St NW 38.912682 -77.031681
24 14th & Rhode Island Ave NW 38.908600 -77.032300
25 20th & E St NW 38.896300 -77.045000
26 21st & I St NW 38.900800 -77.047000
27 Georgia & New Hampshire Ave NW 38.936043 -77.024649
28 14th St & Spring Rd NW 38.937500 -77.032800
29 John McCormack Dr & Michigan Ave NE 38.934600 -76.995500
... ... ... ...
317 Rockville Metro West 39.084379 -77.146866
318 Bethesda Metro 38.984691 -77.094537
319 Iwo Jima Memorial/N Meade & 14th St N 38.889920 -77.071301
320 34th & Water St NW 38.903582 -77.067786
321 Duke St & John Carlyle St 38.804378 -77.060866
322 Lee Hwy & N Cleveland St 38.894941 -77.091690
323 Arlington Blvd & S George Mason Dr/NFATC 38.869442 -77.104503
324 MLK Library/9th & G St NW 38.898404 -77.024281
325 Lee Hwy & N Scott St 38.897612 -77.080851
326 New Hampshire Ave & 24th St NW 38.901755 -77.051084
327 Eisenhower Ave & Mill Race Ln 38.801111 -77.068952
328 Potomac Greens Dr & Slaters Ln 38.821750 -77.047494
329 Ballenger Ave & Dulaney St 38.802677 -77.063562
330 Mount Vernon Ave & E Nelson Ave 38.820064 -77.057619
331 Mount Vernon Ave & E Del Ray Ave 38.825950 -77.058541
332 Monroe Ave & Leslie Ave 38.820932 -77.053096
333 Mount Vernon Ave & Kennedy St 38.833077 -77.059821
334 Court House Metro / 15th & N Uhle St 38.890612 -77.084801
335 Washington Adventist U / Flower Ave & Division St 38.986743 -77.000035
336 6th & S Ball St 38.864702 -77.048672
337 McKinley St & Connecticut Ave NW 38.964970 -77.075946
338 15th & L St NW 38.903810 -77.034931
339 17th & G St NW 38.898410 -77.039624
340 Spring St & Second Ave 38.997653 -77.034499
341 18th & R St NW 38.912648 -77.041834
342 S Joyce & 16th St S 38.859254 -77.063275
343 Union Market/6th St & Neal Pl NE 38.908008 -76.996985
344 N Nelson St & Lee Hwy 38.895929 -77.105246
345 21st St N & N Pierce St 38.898984 -77.078317
346 Lee Hwy & N Kirkwood Rd 38.895377 -77.097130

347 rows × 3 columns


In [29]:
count = usage['station_start'].value_counts()

In [30]:
average_rental_df = DataFrame({ 'average_rental' : count / 365})

In [34]:
average_rental_df.index = range(185)

In [35]:
average_rental_df


Out[35]:
average_rental
0 191.369863
1 151.084932
2 135.386301
3 119.306849
4 110.252055
5 110.087671
6 109.865753
7 108.884932
8 95.531507
9 91.000000
10 87.536986
11 87.175342
12 87.019178
13 86.493151
14 85.638356
15 84.454795
16 80.808219
17 78.709589
18 78.512329
19 78.246575
20 77.728767
21 76.805479
22 76.605479
23 71.438356
24 71.249315
25 70.969863
26 69.309589
27 69.021918
28 68.334247
29 67.942466
... ...
155 3.967123
156 3.564384
157 3.309589
158 3.073973
159 2.895890
160 2.542466
161 2.279452
162 2.189041
163 2.134247
164 2.084932
165 2.054795
166 2.038356
167 1.797260
168 1.767123
169 1.583562
170 1.567123
171 1.391781
172 1.309589
173 1.095890
174 1.057534
175 0.775342
176 0.758904
177 0.739726
178 0.715068
179 0.668493
180 0.660274
181 0.536986
182 0.493151
183 0.361644
184 0.361644

185 rows × 1 columns


In [32]:
merged_df = pd.concat([stations, average_rental_df], axis=1)

In [29]:
plt.scatter(merged_df['long'], merged_df['lat'], color=c_cycle, alpha=0.5, s=(merged_df['average_rental']*10), label='Location of stations', )
plt.legend(bbox_to_anchor=(1.2, 0.2), loc='lower right', borderaxespad=0)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Rental volume and geography')
plt.show()



In [ ]: