In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
%matplotlib inline
from mpl_toolkits.basemap import Basemap
In [2]:
data = pd.read_csv('ch08/Haiti.csv')
data[0:10]
Out[2]:
In [3]:
data[['INCIDENT DATE', 'LATITUDE', 'LONGITUDE']][:10]
Out[3]:
In [4]:
data['CATEGORY'][:6]
Out[4]:
In [5]:
data.describe()
Out[5]:
In [6]:
data = data[(data.LATITUDE > 18) &(data.LATITUDE < 20)&
(data.LONGITUDE > -75) & (data.LONGITUDE < -70)
& data.CATEGORY.notnull()]
In [7]:
def to_cat_list(catstr):
stripped = (x.strip() for x in catstr.split(','))
return [x for x in stripped if x]
def get_all_categories(cat_series):
cat_sets = (set(to_cat_list(x)) for x in cat_series)
return sorted(set.union(*cat_sets))
def get_english(cat):
code, names = cat.split('.')
if '|' in names:
names = names.split('|')[1]
return code, names.strip()
In [8]:
get_english('2. Urgences logistiques | Vital Lines')
Out[8]:
In [9]:
all_cats = get_all_categories(data.CATEGORY)
In [10]:
# 生成器表达式
english_mapping = dict(get_english(x) for x in all_cats)
english_mapping['2a']
Out[10]:
In [11]:
def get_code(seq):
return [x.split('.')[0] for x in seq if x]
all_codes = get_code(all_cats)
code_index = pd.Index(np.unique(all_codes))
dummy_frame = DataFrame(np.zeros((len(data), len(code_index))), index=data.index,
columns=code_index)
In [12]:
dummy_frame.ix[0:7, :6]
Out[12]:
In [13]:
for row, cat in zip(data.index, data.CATEGORY):
codes = get_code(to_cat_list(cat))
dummy_frame.ix[row, codes] = 1
data = data.join(dummy_frame.add_prefix('category'))
In [17]:
data.ix[0:10, 10:25]
Out[17]:
In [15]:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
%matplotlib inline
def base_haiti_map(ax=None, lllat=17.25, urlat=20.25,
lllon=-75, urlon=-71):
# 创建极球面投影的Basemap实例。
m = Basemap(ax=ax, projection='stere',
lon_0=(urlon + lllon) /2,
lat_0=(urlat + lllat) /2,
llcrnrlat=lllat, urcrnrlat=urlat,
llcrnrlon=lllon, urcrnrlon=urlon,
resolution='f')
# 绘制海岸线、州界、国界以及地图边界。
m.drawcoastlines()
m.drawstates()
m.drawcountries()
return m
In [19]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
fig.subplots_adjust(hspace=0.05, wspace=0.05)
to_plot = ['2a', '1', '3c', '7a']
lllat=17.25; urlat=20.25; lllon=-75; urlon=-71
from time import clock
start=clock()
for code, ax in zip(to_plot, axes.flat):
m = base_haiti_map(ax, lllat=lllat, urlat=urlat,
lllon=lllon, urlon=urlon)
cat_data = data[data['category%s' % code] == 1]
# 计算地图的投影坐标。
x, y = m(cat_data.LONGITUDE.values, cat_data.LATITUDE.values)
m.plot(x, y, 'k.', alpha=0.5)
ax.set_title('%s: %s' % (code, english_mapping[code]))
stop=clock()
print (stop-start)/10000
plt.show()
In [ ]: