In [1]:
from pandas import DataFrame, Series
import pandas as pd
import numpy as np

When working with matplotlib we usually do


In [2]:
import matplotlib.pyplot as plt

and then some magic to get plots to show up here


In [3]:
%matplotlib inline

Which we can check:


In [8]:
plt.plot([1,3],[2,4])
plt.title("This is just a sample graph")
plt.xlabel("This is just an x-axis")
plt.ylabel("This is just a y-axis")


Out[8]:
<matplotlib.text.Text at 0x7f021e852b10>

In [10]:
plt.bar(range(10), np.random.rand(10))
plt.title("A random bar chart")


Out[10]:
<matplotlib.text.Text at 0x7f021e76f7d0>

In [16]:
xs = range(10)
plt.scatter(xs, 5 * np.random.rand(10) + xs, color='r', marker='*', label='series1')
plt.scatter(xs, 5 * np.random.rand(10) + xs, color='g', marker='o', label='series2')
plt.title("A scatterplot with two series")
plt.legend(loc=9)


Out[16]:
<matplotlib.legend.Legend at 0x7f021e19ead0>

Now let's pull in our bike share data


In [17]:
weather = pd.read_table('daily_weather.tsv', parse_dates=['date'])
stations = pd.read_table('stations.tsv')
usage = pd.read_table('usage_2012.tsv', parse_dates=['time_start', 'time_end'])
weather.index = pd.DatetimeIndex(weather['date'])
weather.season_desc = weather.season_desc.map({'Spring' : 'Winter', 'Winter' : 'Fall', 'Fall' : 'Summer', 'Summer' : 'Spring' })

We can now plot the temperature across the year:


In [28]:
plt.scatter(weather.index, weather.temp)


Out[28]:
<matplotlib.collections.PathCollection at 0x7f021d0a8510>

Or look at the scatterplot of temperature and humidity:


In [30]:
plt.scatter(weather.humidity, weather.temp)


Out[30]:
<matplotlib.collections.PathCollection at 0x7f021d13ed10>

Or look at the scatter between the number of riders and temperature:


In [32]:
plt.scatter(weather.temp, weather.total_riders)


Out[32]:
<matplotlib.collections.PathCollection at 0x7f021cf64490>

Let's break that down by season. That gives us a good example of mixing vanilla Python code with matplotlib code:


In [41]:
for season, color in zip(['Winter','Spring','Summer','Fall'],['blue','green','orange','brown']):
    temps = weather[weather.season_desc == season].temp
    riders = weather[weather.season_desc == season].total_riders
    plt.scatter(temps, riders, color=color, label=season)
plt.legend(loc=4)
plt.ylim([0, 10000])
plt.xlabel("temperature")
plt.ylabel("# of riders")


Out[41]:
<matplotlib.text.Text at 0x7f021cbfe2d0>

Scatterplot matrix


In [42]:
from pandas.tools.plotting import scatter_matrix

In [46]:
scatter_matrix(weather[['temp', 'humidity', 'windspeed', 'total_riders']])


Out[46]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f0214341590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f0214313550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f0212a16910>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f0212a7d1d0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f0212a9e8d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f021299c110>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f0212918550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f02129a9cd0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f02128742d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f02127f8510>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f02149d38d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f0218bdfa90>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f0219858810>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f021b676550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f021276d750>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f02126d31d0>]], dtype=object)

You can also call plots directly on the dataframes (or series) themselves:


In [47]:
weather['temp'].hist()


Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f02126fea50>

In [48]:
weather['temp'].plot()


Out[48]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f021227b110>

In [61]:
avg_daily_trips = usage.groupby('station_start').size() / 365
trips = DataFrame({ 'avg_daily_trips' : avg_daily_trips })
station_geos = stations[['station', 'lat', 'long']]
trips_by_geo = pd.merge(station_geos, trips, left_on='station', right_index=True)
trips_by_geo


Out[61]:
station lat long avg_daily_trips
0 20th & Bell St 38.856100 -77.051200 4.624658
2 20th & Crystal Dr 38.856400 -77.049200 14.008219
3 15th & Crystal Dr 38.860170 -77.049593 8.476712
4 Aurora Hills Community Ctr/18th & Hayes St 38.857866 -77.059490 5.441096
5 Pentagon City Metro / 12th & S Hayes St 38.862303 -77.059936 11.591781
6 S Joyce & Army Navy Dr 38.863700 -77.063300 10.408219
7 Crystal City Metro / 18th & Bell St 38.857300 -77.051100 35.956164
8 12th & Army Navy Dr 38.862900 -77.052800 10.210959
9 27th & Crystal Dr 38.848441 -77.051516 24.057534
10 S Glebe & Potomac Ave 38.842600 -77.050200 11.994521
11 23rd & Crystal Dr 38.853300 -77.049800 11.416438
13 19th St & Pennsylvania Ave NW 38.900300 -77.042900 43.873973
14 14th & V St NW 38.917600 -77.032100 110.252055
15 11th & Kenyon St NW 38.929464 -77.027822 61.986301
16 16th & Harvard St NW 38.926088 -77.036536 95.531507
17 Adams Mill & Columbia Rd NW 38.922925 -77.042581 110.087671
18 14th & Harvard St NW 38.926800 -77.032200 71.249315
20 Lamont & Mt Pleasant NW 38.931900 -77.038800 66.939726
21 4th & M St SW 38.876700 -77.017800 53.342466
22 15th & P St NW 38.909850 -77.034438 135.386301
23 14th & R St NW 38.912682 -77.031681 68.334247
24 14th & Rhode Island Ave NW 38.908600 -77.032300 87.019178
25 20th & E St NW 38.896300 -77.045000 49.736986
26 21st & I St NW 38.900800 -77.047000 91.000000
27 Georgia & New Hampshire Ave NW 38.936043 -77.024649 42.597260
28 14th St & Spring Rd NW 38.937500 -77.032800 27.421918
29 John McCormack Dr & Michigan Ave NE 38.934600 -76.995500 5.254795
30 5th & K St NW 38.903040 -77.019027 78.709589
31 19th & East Capitol St SE 38.889600 -76.976900 14.693151
32 Park Rd & Holmead Pl NW 38.930800 -77.031500 77.728767
... ... ... ... ...
169 Ballston Metro / N Stuart & 9th St N 38.881044 -77.111768 10.435616
170 N Randolph St & Fairfax Dr 38.882629 -77.109366 5.361644
171 Jefferson Memorial 38.879819 -77.037413 31.309589
172 Good Hope Rd & 14th St SE 38.866611 -76.985238 0.668493
173 Glebe Rd & 11th St N 38.883921 -77.116817 3.309589
174 N Quincy St & Wilson Blvd 38.880151 -77.107673 5.054795
176 Gallaudet / 8th St & Florida Ave NE 38.905090 -76.994100 6.279452
177 20th & L St NW 38.903584 -77.044789 13.109589
178 12th & L St NW 38.903819 -77.028400 23.320548
179 21st St & Pennsylvania Ave NW 38.901539 -77.046564 13.860274
180 19th & K St NW 38.902204 -77.043370 13.830137
181 Prince St & Union St 38.803124 -77.040363 1.797260
182 Market Square / King St & Royal St 38.804718 -77.043363 2.279452
183 Saint Asaph St & Pendleton St 38.810743 -77.044664 2.054795
184 King St & Patrick St 38.805317 -77.049883 1.767123
185 15th & K St NW 38.902000 -77.033530 17.167123
186 Commerce St & Fayette St 38.805648 -77.052930 1.057534
187 Henry St & Pendleton St 38.811456 -77.050276 0.660274
188 Braddock Rd Metro 38.814577 -77.052808 2.895890
189 King St Metro 38.805767 -77.060720 3.967123
190 24th & N St NW 38.906600 -77.051520 13.950685
191 10th & E St NW 38.895914 -77.026064 17.336986
192 22nd & I St NW / Foggy Bottom 38.900880 -77.048911 25.668493
193 Utah St & 11th St N 38.883669 -77.113905 2.038356
194 Barton St & 10th St N 38.884961 -77.087770 1.583562
195 Arlington Blvd & N Queen St 38.889365 -77.077294 1.391781
196 Lincoln Memorial 38.888251 -77.049426 15.490411
198 5th St & Massachusetts Ave NW 38.900930 -77.018677 60.704110
291 Calvert St & Woodley Pl NW 38.923583 -77.050046 78.246575
334 Court House Metro / 15th & N Uhle St 38.890612 -77.084801 21.306849

185 rows × 4 columns


In [62]:
plt.scatter(trips_by_geo['long'], trips_by_geo['lat'], s=trips_by_geo['avg_daily_trips'])


Out[62]:
<matplotlib.collections.PathCollection at 0x7f0211f774d0>

In [ ]: