Plotting and Visualization



In [2]:

    
import numpy as np
from matplotlib import pyplot as plt



In [3]:

    
%matplotlib inline



In [4]:

    
fig = plt.figure()









    





<matplotlib.figure.Figure at 0x1026bb898>



In [5]:

    
ax1 = fig.add_subplot(2, 2, 1)



In [6]:

    
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)



In [10]:

    
%matplotlib notebook
fig.show()









    



/Users/alexkirnas/anaconda/lib/python3.6/site-packages/matplotlib/figure.py:402: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "



In [11]:

    
from numpy.random import randn
plt.plot(randn(50).cumsum(), 'k--')









    














    











    Out[11]:





[<matplotlib.lines.Line2D at 0x103f0fa58>]



In [12]:

    
_ = ax1.hist(randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * randn(30))









    Out[12]:





<matplotlib.collections.PathCollection at 0x103ec35c0>



In [13]:

    
plt.close('all')



In [15]:

    
%matplotlib inline
fig, axes = plt.subplots(2, 3)
axes









    Out[15]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10417e080>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x104226668>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10428cf60>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1042e6860>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10434cac8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1043a61d0>]], dtype=object)

Adjusting the spacing around subplots



In [16]:

    
plt.subplots_adjust(left=None, bottom=None, right=None, top=None,
                wspace=None, hspace=None)









    





<matplotlib.figure.Figure at 0x103e2ba20>



In [17]:

    
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)



In [18]:

    
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

Colors, markers, and line styles



In [ ]:

    
plt.figure()
plt.plot(randn(30).cumsum(), 'ko--')



In [20]:

    
plt.close('all')
data = randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')









    Out[20]:





<matplotlib.legend.Legend at 0x105177cf8>

Ticks, labels, and legends

Setting the title, axis labels, ticks, and ticklabels



In [21]:

    
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum())









    Out[21]:





[<matplotlib.lines.Line2D at 0x1049e9dd8>]



In [25]:

    
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)

ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30, fontsize='small')
ax.set_title('My first matplotlib plot')
ax.set_xlabel('Stages')

ax.plot(randn(1000).cumsum())









    Out[25]:





[<matplotlib.lines.Line2D at 0x1048fb668>]



In [26]:

    
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')

ax.legend(loc='best')









    Out[26]:





<matplotlib.legend.Legend at 0x10497ab00>

Annotations and drawing on a subplot



In [29]:

    
from datetime import datetime
import pandas as pd

data = pd.read_csv('spx.csv', index_col=0, parse_dates=True)
data









    Out[29]:






  
    
      
      PX
    
  
  
    
      1990-02-01
      328.79
    
    
      1990-02-02
      330.92
    
    
      1990-02-05
      331.85
    
    
      1990-02-06
      329.66
    
    
      1990-02-07
      333.75
    
    
      1990-02-08
      332.96
    
    
      1990-02-09
      333.62
    
    
      1990-02-12
      330.08
    
    
      1990-02-13
      331.02
    
    
      1990-02-14
      332.01
    
    
      1990-02-15
      334.89
    
    
      1990-02-16
      332.72
    
    
      1990-02-20
      327.99
    
    
      1990-02-21
      327.67
    
    
      1990-02-22
      325.70
    
    
      1990-02-23
      324.15
    
    
      1990-02-26
      328.67
    
    
      1990-02-27
      330.26
    
    
      1990-02-28
      331.89
    
    
      1990-03-01
      332.74
    
    
      1990-03-02
      335.54
    
    
      1990-03-05
      333.74
    
    
      1990-03-06
      337.93
    
    
      1990-03-07
      336.95
    
    
      1990-03-08
      340.27
    
    
      1990-03-09
      337.93
    
    
      1990-03-12
      338.67
    
    
      1990-03-13
      336.00
    
    
      1990-03-14
      336.87
    
    
      1990-03-15
      338.07
    
    
      ...
      ...
    
    
      2011-09-02
      1173.97
    
    
      2011-09-06
      1165.24
    
    
      2011-09-07
      1198.62
    
    
      2011-09-08
      1185.90
    
    
      2011-09-09
      1154.23
    
    
      2011-09-12
      1162.27
    
    
      2011-09-13
      1172.87
    
    
      2011-09-14
      1188.68
    
    
      2011-09-15
      1209.11
    
    
      2011-09-16
      1216.01
    
    
      2011-09-19
      1204.09
    
    
      2011-09-20
      1202.09
    
    
      2011-09-21
      1166.76
    
    
      2011-09-22
      1129.56
    
    
      2011-09-23
      1136.43
    
    
      2011-09-26
      1162.95
    
    
      2011-09-27
      1175.38
    
    
      2011-09-28
      1151.06
    
    
      2011-09-29
      1160.40
    
    
      2011-09-30
      1131.42
    
    
      2011-10-03
      1099.23
    
    
      2011-10-04
      1123.95
    
    
      2011-10-05
      1144.03
    
    
      2011-10-06
      1164.97
    
    
      2011-10-07
      1155.46
    
    
      2011-10-10
      1194.89
    
    
      2011-10-11
      1195.54
    
    
      2011-10-12
      1207.25
    
    
      2011-10-13
      1203.66
    
    
      2011-10-14
      1224.58
    
  

5472 rows × 1 columns



In [30]:

    
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

spx = data['PX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 50),
                xytext=(date, spx.asof(date) + 200),
                arrowprops=dict(facecolor='black'),
                horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in 2008-2009 financial crisis')









    Out[30]:





<matplotlib.text.Text at 0x10c357898>



In [31]:

    
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)









    Out[31]:





<matplotlib.patches.Polygon at 0x10bfd5710>

Saving plots to file



In [32]:

    
fig
fig.savefig('figpath.svg')
fig.savefig('figpath.png', dpi=400, bbox_inches='tight')



In [33]:

    
from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()









    





<matplotlib.figure.Figure at 0x10c05a8d0>

Plotting functions in pandasPlotting functions in pandas

Line plots



In [35]:

    
from pandas import Series, DataFrame
s = Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()









    Out[35]:





<matplotlib.axes._subplots.AxesSubplot at 0x10de531d0>



In [36]:

    
df = DataFrame(np.random.randn(10, 4).cumsum(0),
               columns=['A', 'B', 'C', 'D'],
               index=np.arange(0, 100, 10))
df.plot()









    Out[36]:





<matplotlib.axes._subplots.AxesSubplot at 0x10decd550>

Bar plots



In [37]:

    
fig, axes = plt.subplots(2, 1)
data = Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot(kind='bar', ax=axes[0], color='k', alpha=0.7)
data.plot(kind='barh', ax=axes[1], color='k', alpha=0.7)









    Out[37]:





<matplotlib.axes._subplots.AxesSubplot at 0x10e03c550>



In [38]:

    
df = DataFrame(np.random.rand(6, 4),
               index=['one', 'two', 'three', 'four', 'five', 'six'],
               columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df
df.plot(kind='bar')









    Out[38]:





<matplotlib.axes._subplots.AxesSubplot at 0x10e04cfd0>



In [39]:

    
df.plot(kind='barh', stacked=True, alpha=0.5)









    Out[39]:





<matplotlib.axes._subplots.AxesSubplot at 0x10e374f28>



In [45]:

    
tips = pd.read_csv('tips.csv')
tips









    Out[45]:






  
    
      
      total_bill
      tip
      sex
      smoker
      day
      time
      size
    
  
  
    
      0
      16.99
      1.01
      Female
      No
      Sun
      Dinner
      2
    
    
      1
      10.34
      1.66
      Male
      No
      Sun
      Dinner
      3
    
    
      2
      21.01
      3.50
      Male
      No
      Sun
      Dinner
      3
    
    
      3
      23.68
      3.31
      Male
      No
      Sun
      Dinner
      2
    
    
      4
      24.59
      3.61
      Female
      No
      Sun
      Dinner
      4
    
    
      5
      25.29
      4.71
      Male
      No
      Sun
      Dinner
      4
    
    
      6
      8.77
      2.00
      Male
      No
      Sun
      Dinner
      2
    
    
      7
      26.88
      3.12
      Male
      No
      Sun
      Dinner
      4
    
    
      8
      15.04
      1.96
      Male
      No
      Sun
      Dinner
      2
    
    
      9
      14.78
      3.23
      Male
      No
      Sun
      Dinner
      2
    
    
      10
      10.27
      1.71
      Male
      No
      Sun
      Dinner
      2
    
    
      11
      35.26
      5.00
      Female
      No
      Sun
      Dinner
      4
    
    
      12
      15.42
      1.57
      Male
      No
      Sun
      Dinner
      2
    
    
      13
      18.43
      3.00
      Male
      No
      Sun
      Dinner
      4
    
    
      14
      14.83
      3.02
      Female
      No
      Sun
      Dinner
      2
    
    
      15
      21.58
      3.92
      Male
      No
      Sun
      Dinner
      2
    
    
      16
      10.33
      1.67
      Female
      No
      Sun
      Dinner
      3
    
    
      17
      16.29
      3.71
      Male
      No
      Sun
      Dinner
      3
    
    
      18
      16.97
      3.50
      Female
      No
      Sun
      Dinner
      3
    
    
      19
      20.65
      3.35
      Male
      No
      Sat
      Dinner
      3
    
    
      20
      17.92
      4.08
      Male
      No
      Sat
      Dinner
      2
    
    
      21
      20.29
      2.75
      Female
      No
      Sat
      Dinner
      2
    
    
      22
      15.77
      2.23
      Female
      No
      Sat
      Dinner
      2
    
    
      23
      39.42
      7.58
      Male
      No
      Sat
      Dinner
      4
    
    
      24
      19.82
      3.18
      Male
      No
      Sat
      Dinner
      2
    
    
      25
      17.81
      2.34
      Male
      No
      Sat
      Dinner
      4
    
    
      26
      13.37
      2.00
      Male
      No
      Sat
      Dinner
      2
    
    
      27
      12.69
      2.00
      Male
      No
      Sat
      Dinner
      2
    
    
      28
      21.70
      4.30
      Male
      No
      Sat
      Dinner
      2
    
    
      29
      19.65
      3.00
      Female
      No
      Sat
      Dinner
      2
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      214
      28.17
      6.50
      Female
      Yes
      Sat
      Dinner
      3
    
    
      215
      12.90
      1.10
      Female
      Yes
      Sat
      Dinner
      2
    
    
      216
      28.15
      3.00
      Male
      Yes
      Sat
      Dinner
      5
    
    
      217
      11.59
      1.50
      Male
      Yes
      Sat
      Dinner
      2
    
    
      218
      7.74
      1.44
      Male
      Yes
      Sat
      Dinner
      2
    
    
      219
      30.14
      3.09
      Female
      Yes
      Sat
      Dinner
      4
    
    
      220
      12.16
      2.20
      Male
      Yes
      Fri
      Lunch
      2
    
    
      221
      13.42
      3.48
      Female
      Yes
      Fri
      Lunch
      2
    
    
      222
      8.58
      1.92
      Male
      Yes
      Fri
      Lunch
      1
    
    
      223
      15.98
      3.00
      Female
      No
      Fri
      Lunch
      3
    
    
      224
      13.42
      1.58
      Male
      Yes
      Fri
      Lunch
      2
    
    
      225
      16.27
      2.50
      Female
      Yes
      Fri
      Lunch
      2
    
    
      226
      10.09
      2.00
      Female
      Yes
      Fri
      Lunch
      2
    
    
      227
      20.45
      3.00
      Male
      No
      Sat
      Dinner
      4
    
    
      228
      13.28
      2.72
      Male
      No
      Sat
      Dinner
      2
    
    
      229
      22.12
      2.88
      Female
      Yes
      Sat
      Dinner
      2
    
    
      230
      24.01
      2.00
      Male
      Yes
      Sat
      Dinner
      4
    
    
      231
      15.69
      3.00
      Male
      Yes
      Sat
      Dinner
      3
    
    
      232
      11.61
      3.39
      Male
      No
      Sat
      Dinner
      2
    
    
      233
      10.77
      1.47
      Male
      No
      Sat
      Dinner
      2
    
    
      234
      15.53
      3.00
      Male
      Yes
      Sat
      Dinner
      2
    
    
      235
      10.07
      1.25
      Male
      No
      Sat
      Dinner
      2
    
    
      236
      12.60
      1.00
      Male
      Yes
      Sat
      Dinner
      2
    
    
      237
      32.83
      1.17
      Male
      Yes
      Sat
      Dinner
      2
    
    
      238
      35.83
      4.67
      Female
      No
      Sat
      Dinner
      3
    
    
      239
      29.03
      5.92
      Male
      No
      Sat
      Dinner
      3
    
    
      240
      27.18
      2.00
      Female
      Yes
      Sat
      Dinner
      2
    
    
      241
      22.67
      2.00
      Male
      Yes
      Sat
      Dinner
      2
    
    
      242
      17.82
      1.75
      Male
      No
      Sat
      Dinner
      2
    
    
      243
      18.78
      3.00
      Female
      No
      Thur
      Dinner
      2
    
  

244 rows × 7 columns



In [46]:

    
party_counts = pd.crosstab(tips.day, tips.size)
party_counts



In [47]:

    
party_counts = party_counts.ix[:, 2:5]



In [48]:

    
party_pcts = party_counts.div(party_counts.sum(1).astype(float), axis=0)
party_pcts



In [49]:

    
party_pcts.plot(kind='bar', stacked=True)









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-49-3f02dc113a56> in <module>()
----> 1 party_pcts.plot(kind='bar', stacked=True)

/Users/alexkirnas/anaconda/lib/python3.6/site-packages/pandas/tools/plotting.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   3772                           fontsize=fontsize, colormap=colormap, table=table,
   3773                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 3774                           sort_columns=sort_columns, **kwds)
   3775     __call__.__doc__ = plot_frame.__doc__
   3776 

/Users/alexkirnas/anaconda/lib/python3.6/site-packages/pandas/tools/plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2641                  yerr=yerr, xerr=xerr,
   2642                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 2643                  **kwds)
   2644 
   2645 

/Users/alexkirnas/anaconda/lib/python3.6/site-packages/pandas/tools/plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   2468         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2469 
-> 2470     plot_obj.generate()
   2471     plot_obj.draw()
   2472     return plot_obj.result

/Users/alexkirnas/anaconda/lib/python3.6/site-packages/pandas/tools/plotting.py in generate(self)
   1039     def generate(self):
   1040         self._args_adjust()
-> 1041         self._compute_plot_data()
   1042         self._setup_subplots()
   1043         self._make_plot()

/Users/alexkirnas/anaconda/lib/python3.6/site-packages/pandas/tools/plotting.py in _compute_plot_data(self)
   1148         if is_empty:
   1149             raise TypeError('Empty {0!r}: no numeric data to '
-> 1150                             'plot'.format(numeric_data.__class__.__name__))
   1151 
   1152         self.data = numeric_data

TypeError: Empty 'DataFrame': no numeric data to plot

Histograms and density plots



In [50]:

    
tips['tip_pct'] = tips['tip'] / tips['total_bill']
tips['tip_pct'].hist(bins=50)









    Out[50]:





<matplotlib.axes._subplots.AxesSubplot at 0x104903e80>



In [51]:

    
tips['tip_pct'].plot(kind='kde')









    Out[51]:





<matplotlib.axes._subplots.AxesSubplot at 0x104e9e470>



In [52]:

    
comp1 = np.random.normal(0, 1, size=200)  # N(0, 1)
comp2 = np.random.normal(10, 2, size=200)  # N(10, 4)
values = Series(np.concatenate([comp1, comp2]))
values.hist(bins=100, alpha=0.3, color='k', normed=True)
values.plot(kind='kde', style='k--')









    Out[52]:





<matplotlib.axes._subplots.AxesSubplot at 0x104f43f60>

Scatter plots



In [53]:

    
macro = pd.read_csv('macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
trans_data[-5:]



In [54]:

    
plt.scatter(trans_data['m1'], trans_data['unemp'])
plt.title('Changes in log %s vs. log %s' % ('m1', 'unemp'))









    Out[54]:





<matplotlib.text.Text at 0x112cc01d0>



In [56]:

    
pd.scatter_matrix(trans_data, diagonal='kde', alpha=0.3)









    Out[56]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112edf208>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11358d860>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x113609048>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11365c390>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1136c17b8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1136c17f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x113731320>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x113782c18>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1137f0048>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x113847a58>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1138aaf60>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1138bacc0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x11396bda0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1139db0f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x113a2d9e8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x113a95cf8>]], dtype=object)

Plotting Maps: Visualizing Haiti Earthquake Crisis data



In [58]:

    
data = pd.read_csv('Haiti.csv')
data.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3593 entries, 0 to 3592
Data columns (total 10 columns):
Serial            3593 non-null int64
INCIDENT TITLE    3593 non-null object
INCIDENT DATE     3593 non-null object
LOCATION          3592 non-null object
DESCRIPTION       3593 non-null object
CATEGORY          3587 non-null object
LATITUDE          3593 non-null float64
LONGITUDE         3593 non-null float64
APPROVED          3593 non-null object
VERIFIED          3593 non-null object
dtypes: float64(2), int64(1), object(7)
memory usage: 280.8+ KB



In [59]:

    
data[['INCIDENT DATE', 'LATITUDE', 'LONGITUDE']][:10]









    Out[59]:






  
    
      
      INCIDENT DATE
      LATITUDE
      LONGITUDE
    
  
  
    
      0
      05/07/2010 17:26
      18.233333
      -72.533333
    
    
      1
      28/06/2010 23:06
      50.226029
      5.729886
    
    
      2
      24/06/2010 16:21
      22.278381
      114.174287
    
    
      3
      20/06/2010 21:59
      44.407062
      8.933989
    
    
      4
      18/05/2010 16:26
      18.571084
      -72.334671
    
    
      5
      26/04/2010 13:14
      18.593707
      -72.310079
    
    
      6
      26/04/2010 14:19
      18.482800
      -73.638800
    
    
      7
      26/04/2010 14:27
      18.415000
      -73.195000
    
    
      8
      15/03/2010 10:58
      18.517443
      -72.236841
    
    
      9
      15/03/2010 11:00
      18.547790
      -72.410010



In [60]:

    
data['CATEGORY'][:6]









    Out[60]:





0          1. Urgences | Emergency, 3. Public Health, 
1    1. Urgences | Emergency, 2. Urgences logistiqu...
2    2. Urgences logistiques | Vital Lines, 8. Autr...
3                            1. Urgences | Emergency, 
4                            1. Urgences | Emergency, 
5                       5e. Communication lines down, 
Name: CATEGORY, dtype: object



In [61]:

    
data.describe()









    Out[61]:






  
    
      
      Serial
      LATITUDE
      LONGITUDE
    
  
  
    
      count
      3593.000000
      3593.000000
      3593.000000
    
    
      mean
      2080.277484
      18.611495
      -72.322680
    
    
      std
      1171.100360
      0.738572
      3.650776
    
    
      min
      4.000000
      18.041313
      -74.452757
    
    
      25%
      1074.000000
      18.524070
      -72.417500
    
    
      50%
      2163.000000
      18.539269
      -72.335000
    
    
      75%
      3088.000000
      18.561820
      -72.293570
    
    
      max
      4052.000000
      50.226029
      114.174287



In [62]:

    
data = data[(data.LATITUDE > 18) & (data.LATITUDE < 20) &
            (data.LONGITUDE > -75) & (data.LONGITUDE < -70)
            & data.CATEGORY.notnull()]



In [63]:

    
def to_cat_list(catstr):
    stripped = (x.strip() for x in catstr.split(','))
    return [x for x in stripped if x]

def get_all_categories(cat_series):
    cat_sets = (set(to_cat_list(x)) for x in cat_series)
    return sorted(set.union(*cat_sets))

def get_english(cat):
    code, names = cat.split('.')
    if '|' in names:
        names = names.split(' | ')[1]
    return code, names.strip()



In [64]:

    
all_cats = get_all_categories(data.CATEGORY)
english_mapping = dict(get_english(x) for x in all_cats)
english_mapping['2a']
english_mapping['6c']









    Out[64]:





'Earthquake and aftershocks'



In [65]:

    
def get_code(seq):
    return [x.split('.')[0] for x in seq if x]

all_codes = get_code(all_cats)
code_index = pd.Index(np.unique(all_codes))
dummy_frame = DataFrame(np.zeros((len(data), len(code_index))),
                        index=data.index, columns=code_index)



In [66]:

    
dummy_frame.ix[:, :6].info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 3569 entries, 0 to 3592
Data columns (total 6 columns):
1     3569 non-null float64
1a    3569 non-null float64
1b    3569 non-null float64
1c    3569 non-null float64
1d    3569 non-null float64
2     3569 non-null float64
dtypes: float64(6)
memory usage: 195.2 KB



In [67]:

    
for row, cat in zip(data.index, data.CATEGORY):
    codes = get_code(to_cat_list(cat))
    dummy_frame.ix[row, codes] = 1

data = data.join(dummy_frame.add_prefix('category_'))



In [68]:

    
data.ix[:, 10:15].info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 3569 entries, 0 to 3592
Data columns (total 5 columns):
category_1     3569 non-null float64
category_1a    3569 non-null float64
category_1b    3569 non-null float64
category_1c    3569 non-null float64
category_1d    3569 non-null float64
dtypes: float64(5)
memory usage: 167.3 KB



In [69]:

    
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt

def basic_haiti_map(ax=None, lllat=17.25, urlat=20.25,
                    lllon=-75, urlon=-71):
    # create polar stereographic Basemap instance.
    m = Basemap(ax=ax, projection='stere',
                lon_0=(urlon + lllon) / 2,
                lat_0=(urlat + lllat) / 2,
                llcrnrlat=lllat, urcrnrlat=urlat,
                llcrnrlon=lllon, urcrnrlon=urlon,
                resolution='f')
    # draw coastlines, state and country boundaries, edge of map.
    m.drawcoastlines()
    m.drawstates()
    m.drawcountries()
    return m









    



---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-69-ec31ba3e955e> in <module>()
----> 1 from mpl_toolkits.basemap import Basemap
      2 import matplotlib.pyplot as plt
      3 
      4 def basic_haiti_map(ax=None, lllat=17.25, urlat=20.25,
      5                     lllon=-75, urlon=-71):

ModuleNotFoundError: No module named 'mpl_toolkits.basemap'



In [70]:

    
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
fig.subplots_adjust(hspace=0.05, wspace=0.05)

to_plot = ['2a', '1', '3c', '7a']

lllat=17.25; urlat=20.25; lllon=-75; urlon=-71

for code, ax in zip(to_plot, axes.flat):
    m = basic_haiti_map(ax, lllat=lllat, urlat=urlat,
                        lllon=lllon, urlon=urlon)

    cat_data = data[data['category_%s' % code] == 1]

    # compute map proj coordinates.
    x, y = m(cat_data.LONGITUDE.values, cat_data.LATITUDE.values)

    m.plot(x, y, 'k.', alpha=0.5)
    ax.set_title('%s: %s' % (code, english_mapping[code]))









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-70-395ceffee373> in <module>()
      7 
      8 for code, ax in zip(to_plot, axes.flat):
----> 9     m = basic_haiti_map(ax, lllat=lllat, urlat=urlat,
     10                         lllon=lllon, urlon=urlon)
     11 

NameError: name 'basic_haiti_map' is not defined



In [71]:

    
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
fig.subplots_adjust(hspace=0.05, wspace=0.05)

to_plot = ['2a', '1', '3c', '7a']

lllat=17.25; urlat=20.25; lllon=-75; urlon=-71

def make_plot():

    for i, code in enumerate(to_plot):
        cat_data = data[data['category_%s' % code] == 1]
        lons, lats = cat_data.LONGITUDE, cat_data.LATITUDE

        ax = axes.flat[i]
        m = basic_haiti_map(ax, lllat=lllat, urlat=urlat,
                            lllon=lllon, urlon=urlon)

        # compute map proj coordinates.
        x, y = m(lons.values, lats.values)

        m.plot(x, y, 'k.', alpha=0.5)
        ax.set_title('%s: %s' % (code, english_mapping[code]))



In [ ]:

	cpi	m1	tbilrate	unemp
198	-0.007904	0.045361	-0.396881	0.105361
199	-0.021979	0.066753	-2.277267	0.139762
200	0.002340	0.010286	0.606136	0.160343
201	0.008419	0.037461	-0.200671	0.127339
202	0.008894	0.012202	-0.405465	0.042560

	PX
1990-02-01	328.79
1990-02-02	330.92
1990-02-05	331.85
1990-02-06	329.66
1990-02-07	333.75
1990-02-08	332.96
1990-02-09	333.62
1990-02-12	330.08
1990-02-13	331.02
1990-02-14	332.01
1990-02-15	334.89
1990-02-16	332.72
1990-02-20	327.99
1990-02-21	327.67
1990-02-22	325.70
1990-02-23	324.15
1990-02-26	328.67
1990-02-27	330.26
1990-02-28	331.89
1990-03-01	332.74
1990-03-02	335.54
1990-03-05	333.74
1990-03-06	337.93
1990-03-07	336.95
1990-03-08	340.27
1990-03-09	337.93
1990-03-12	338.67
1990-03-13	336.00
1990-03-14	336.87
1990-03-15	338.07
...	...
2011-09-02	1173.97
2011-09-06	1165.24
2011-09-07	1198.62
2011-09-08	1185.90
2011-09-09	1154.23
2011-09-12	1162.27
2011-09-13	1172.87
2011-09-14	1188.68
2011-09-15	1209.11
2011-09-16	1216.01
2011-09-19	1204.09
2011-09-20	1202.09
2011-09-21	1166.76
2011-09-22	1129.56
2011-09-23	1136.43
2011-09-26	1162.95
2011-09-27	1175.38
2011-09-28	1151.06
2011-09-29	1160.40
2011-09-30	1131.42
2011-10-03	1099.23
2011-10-04	1123.95
2011-10-05	1144.03
2011-10-06	1164.97
2011-10-07	1155.46
2011-10-10	1194.89
2011-10-11	1195.54
2011-10-12	1207.25
2011-10-13	1203.66
2011-10-14	1224.58

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4
5	25.29	4.71	Male	No	Sun	Dinner	4
6	8.77	2.00	Male	No	Sun	Dinner	2
7	26.88	3.12	Male	No	Sun	Dinner	4
8	15.04	1.96	Male	No	Sun	Dinner	2
9	14.78	3.23	Male	No	Sun	Dinner	2
10	10.27	1.71	Male	No	Sun	Dinner	2
11	35.26	5.00	Female	No	Sun	Dinner	4
12	15.42	1.57	Male	No	Sun	Dinner	2
13	18.43	3.00	Male	No	Sun	Dinner	4
14	14.83	3.02	Female	No	Sun	Dinner	2
15	21.58	3.92	Male	No	Sun	Dinner	2
16	10.33	1.67	Female	No	Sun	Dinner	3
17	16.29	3.71	Male	No	Sun	Dinner	3
18	16.97	3.50	Female	No	Sun	Dinner	3
19	20.65	3.35	Male	No	Sat	Dinner	3
20	17.92	4.08	Male	No	Sat	Dinner	2
21	20.29	2.75	Female	No	Sat	Dinner	2
22	15.77	2.23	Female	No	Sat	Dinner	2
23	39.42	7.58	Male	No	Sat	Dinner	4
24	19.82	3.18	Male	No	Sat	Dinner	2
25	17.81	2.34	Male	No	Sat	Dinner	4
26	13.37	2.00	Male	No	Sat	Dinner	2
27	12.69	2.00	Male	No	Sat	Dinner	2
28	21.70	4.30	Male	No	Sat	Dinner	2
29	19.65	3.00	Female	No	Sat	Dinner	2
...	...	...	...	...	...	...	...
214	28.17	6.50	Female	Yes	Sat	Dinner	3
215	12.90	1.10	Female	Yes	Sat	Dinner	2
216	28.15	3.00	Male	Yes	Sat	Dinner	5
217	11.59	1.50	Male	Yes	Sat	Dinner	2
218	7.74	1.44	Male	Yes	Sat	Dinner	2
219	30.14	3.09	Female	Yes	Sat	Dinner	4
220	12.16	2.20	Male	Yes	Fri	Lunch	2
221	13.42	3.48	Female	Yes	Fri	Lunch	2
222	8.58	1.92	Male	Yes	Fri	Lunch	1
223	15.98	3.00	Female	No	Fri	Lunch	3
224	13.42	1.58	Male	Yes	Fri	Lunch	2
225	16.27	2.50	Female	Yes	Fri	Lunch	2
226	10.09	2.00	Female	Yes	Fri	Lunch	2
227	20.45	3.00	Male	No	Sat	Dinner	4
228	13.28	2.72	Male	No	Sat	Dinner	2
229	22.12	2.88	Female	Yes	Sat	Dinner	2
230	24.01	2.00	Male	Yes	Sat	Dinner	4
231	15.69	3.00	Male	Yes	Sat	Dinner	3
232	11.61	3.39	Male	No	Sat	Dinner	2
233	10.77	1.47	Male	No	Sat	Dinner	2
234	15.53	3.00	Male	Yes	Sat	Dinner	2
235	10.07	1.25	Male	No	Sat	Dinner	2
236	12.60	1.00	Male	Yes	Sat	Dinner	2
237	32.83	1.17	Male	Yes	Sat	Dinner	2
238	35.83	4.67	Female	No	Sat	Dinner	3
239	29.03	5.92	Male	No	Sat	Dinner	3
240	27.18	2.00	Female	Yes	Sat	Dinner	2
241	22.67	2.00	Male	Yes	Sat	Dinner	2
242	17.82	1.75	Male	No	Sat	Dinner	2
243	18.78	3.00	Female	No	Thur	Dinner	2

	INCIDENT DATE	LATITUDE	LONGITUDE
0	05/07/2010 17:26	18.233333	-72.533333
1	28/06/2010 23:06	50.226029	5.729886
2	24/06/2010 16:21	22.278381	114.174287
3	20/06/2010 21:59	44.407062	8.933989
4	18/05/2010 16:26	18.571084	-72.334671
5	26/04/2010 13:14	18.593707	-72.310079
6	26/04/2010 14:19	18.482800	-73.638800
7	26/04/2010 14:27	18.415000	-73.195000
8	15/03/2010 10:58	18.517443	-72.236841
9	15/03/2010 11:00	18.547790	-72.410010

	Serial	LATITUDE	LONGITUDE
count	3593.000000	3593.000000	3593.000000
mean	2080.277484	18.611495	-72.322680
std	1171.100360	0.738572	3.650776
min	4.000000	18.041313	-74.452757
25%	1074.000000	18.524070	-72.417500
50%	2163.000000	18.539269	-72.335000
75%	3088.000000	18.561820	-72.293570
max	4052.000000	50.226029	114.174287