In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
%matplotlib inline
from mpl_toolkits.basemap import Basemap

In [2]:
data = pd.read_csv('ch08/Haiti.csv')
data[0:10]


Out[2]:
Serial INCIDENT TITLE INCIDENT DATE LOCATION DESCRIPTION CATEGORY LATITUDE LONGITUDE APPROVED VERIFIED
0 4052 * URGENT * Type O blood donations needed in #J... 05/07/2010 17:26 Jacmel, Haiti Birthing Clinic in Jacmel #Haiti urgently need... 1. Urgences | Emergency, 3. Public Health, 18.233333 -72.533333 YES NO
1 4051 Food-Aid sent to Fondwa, Haiti 28/06/2010 23:06 fondwa Please help food-aid.org deliver more food to ... 1. Urgences | Emergency, 2. Urgences logistiqu... 50.226029 5.729886 NO NO
2 4050 how haiti is right now and how it was during t... 24/06/2010 16:21 centrie i feel so bad for you i know i am supposed to ... 2. Urgences logistiques | Vital Lines, 8. Autr... 22.278381 114.174287 NO NO
3 4049 Lost person 20/06/2010 21:59 Genoca We are family members of Juan Antonio Zuniga O... 1. Urgences | Emergency, 44.407062 8.933989 NO NO
4 4042 Citi Soleil school 18/05/2010 16:26 Citi Soleil, Haiti We are working with Haitian (NGO) -The Christi... 1. Urgences | Emergency, 18.571084 -72.334671 YES NO
5 4041 Radio Commerce in Sarthe 26/04/2010 13:14 Radio Commerce Shelter, Sarthe i'm Louinel from Sarthe. I'd to know what can ... 5e. Communication lines down, 18.593707 -72.310079 YES NO
6 4040 Contaminated water in Baraderes. 26/04/2010 14:19 Marc near Baraderes How do we treat water in areas without Pipe?\t... 4. Menaces | Security Threats, 4e. Assainissem... 18.482800 -73.638800 YES NO
7 4039 Violence at "arcahaie bas Saint-Ard" 26/04/2010 14:27 unable to find "arcahaie bas Saint-Ard&qu... Goodnight at (arcahaie bas Saint-Ard) 2 young ... 4. Menaces | Security Threats, 18.415000 -73.195000 YES NO
8 4038 No electricity in pernier 15/03/2010 10:58 Pernier why the people who lives in pernier doesn't fi... 2. Urgences logistiques | Vital Lines, 2f. San... 18.517443 -72.236841 YES NO
9 4037 Shelter and food needed at Lamentin 54 and Rue... 15/03/2010 11:00 Intersection of Lamentin 54 and Rue St Juste GOOD EVENING ONG, I'M VERY HAPPY FOR THE AID Y... 2. Urgences logistiques | Vital Lines, 2d. Ref... 18.547790 -72.410010 YES NO

In [3]:
data[['INCIDENT DATE', 'LATITUDE', 'LONGITUDE']][:10]


Out[3]:
INCIDENT DATE LATITUDE LONGITUDE
0 05/07/2010 17:26 18.233333 -72.533333
1 28/06/2010 23:06 50.226029 5.729886
2 24/06/2010 16:21 22.278381 114.174287
3 20/06/2010 21:59 44.407062 8.933989
4 18/05/2010 16:26 18.571084 -72.334671
5 26/04/2010 13:14 18.593707 -72.310079
6 26/04/2010 14:19 18.482800 -73.638800
7 26/04/2010 14:27 18.415000 -73.195000
8 15/03/2010 10:58 18.517443 -72.236841
9 15/03/2010 11:00 18.547790 -72.410010

In [4]:
data['CATEGORY'][:6]


Out[4]:
0          1. Urgences | Emergency, 3. Public Health, 
1    1. Urgences | Emergency, 2. Urgences logistiqu...
2    2. Urgences logistiques | Vital Lines, 8. Autr...
3                            1. Urgences | Emergency, 
4                            1. Urgences | Emergency, 
5                       5e. Communication lines down, 
Name: CATEGORY, dtype: object

In [5]:
data.describe()


Out[5]:
Serial LATITUDE LONGITUDE
count 3593.000000 3593.000000 3593.000000
mean 2080.277484 18.611495 -72.322680
std 1171.100360 0.738572 3.650776
min 4.000000 18.041313 -74.452757
25% 1074.000000 18.524070 -72.417500
50% 2163.000000 18.539269 -72.335000
75% 3088.000000 18.561820 -72.293570
max 4052.000000 50.226029 114.174287

In [6]:
data = data[(data.LATITUDE > 18) &(data.LATITUDE < 20)&
           (data.LONGITUDE > -75) & (data.LONGITUDE < -70)
           & data.CATEGORY.notnull()]

In [7]:
def to_cat_list(catstr):
    stripped = (x.strip() for x in catstr.split(','))
    return [x for x in stripped if x]

def get_all_categories(cat_series):
    cat_sets = (set(to_cat_list(x)) for x in cat_series)
    return sorted(set.union(*cat_sets))

def get_english(cat):
    code, names = cat.split('.')
    if '|' in names:
        names = names.split('|')[1]
    return code, names.strip()

In [8]:
get_english('2. Urgences logistiques | Vital Lines')


Out[8]:
('2', 'Vital Lines')

In [9]:
all_cats = get_all_categories(data.CATEGORY)

In [10]:
# 生成器表达式
english_mapping = dict(get_english(x) for x in all_cats)

english_mapping['2a']


Out[10]:
'Food Shortage'

In [11]:
def get_code(seq):
    return [x.split('.')[0] for x in seq if x]

all_codes = get_code(all_cats)
code_index = pd.Index(np.unique(all_codes))
dummy_frame = DataFrame(np.zeros((len(data), len(code_index))), index=data.index,
                        columns=code_index)

In [12]:
dummy_frame.ix[0:7, :6]


Out[12]:
1 1a 1b 1c 1d 2
0 0.0 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0 0.0
5 0.0 0.0 0.0 0.0 0.0 0.0
6 0.0 0.0 0.0 0.0 0.0 0.0
7 0.0 0.0 0.0 0.0 0.0 0.0

In [13]:
for row, cat in zip(data.index, data.CATEGORY):
    codes = get_code(to_cat_list(cat))
    dummy_frame.ix[row, codes] = 1

data = data.join(dummy_frame.add_prefix('category'))

In [17]:
data.ix[0:10, 10:25]


Out[17]:
category1 category1a category1b category1c category1d category2 category2a category2b category2c category2d category2e category2f category3 category3a category3b
0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
4 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
7 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
8 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0
9 0.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0
10 0.0 1.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0

In [15]:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
%matplotlib inline

def base_haiti_map(ax=None, lllat=17.25, urlat=20.25,
                  lllon=-75, urlon=-71):
    # 创建极球面投影的Basemap实例。
    m = Basemap(ax=ax, projection='stere',
               lon_0=(urlon + lllon) /2,
               lat_0=(urlat + lllat) /2,
               llcrnrlat=lllat, urcrnrlat=urlat,
               llcrnrlon=lllon, urcrnrlon=urlon,
               resolution='f')
    # 绘制海岸线、州界、国界以及地图边界。
    m.drawcoastlines()
    m.drawstates()
    m.drawcountries()
    return m

In [19]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
fig.subplots_adjust(hspace=0.05, wspace=0.05)

to_plot = ['2a', '1', '3c', '7a']

lllat=17.25; urlat=20.25; lllon=-75; urlon=-71

from time import clock
start=clock()
for code, ax in zip(to_plot, axes.flat):
    m = base_haiti_map(ax, lllat=lllat, urlat=urlat,
                      lllon=lllon, urlon=urlon)
    cat_data = data[data['category%s' % code] == 1]
    
    # 计算地图的投影坐标。
    x, y = m(cat_data.LONGITUDE.values, cat_data.LATITUDE.values)
    
    m.plot(x, y, 'k.', alpha=0.5)
    ax.set_title('%s: %s' % (code, english_mapping[code]))
stop=clock()
print (stop-start)/10000
plt.show()


0.0290626652492

In [ ]: