In [1]:
from distributed import Executor, hdfs, progress, wait, s3
e = Executor('localhost:8786')
e
Out[1]:
In [2]:
df = s3.read_csv('s3://blaze-data/gdelt/csv/201401*.export.csv', sep='\t', header=None)
In [3]:
df = e.persist(df)
In [4]:
progress(df)
In [5]:
df.head(5)
Out[5]:
In [6]:
gts = df[[1, 26, 0, 51, 3, 53, 54]]
In [7]:
gts.columns = ['Date', 'Code', 'ID', 'Country', 'Year', 'Latitude', 'Longitude']
In [8]:
gts.head()
Out[8]:
In [9]:
gts = gts[gts['Year'] == 2014]
In [10]:
gts.head()
Out[10]:
In [11]:
event_codes = [211, 231, 311, 331, 61, 71]
In [12]:
gts = gts[gts['Code'].isin(event_codes)]
In [13]:
gts.head()
Out[13]:
In [14]:
gts = gts[gts['Country'] == 'US']
In [15]:
gts.head()
Out[15]:
In [16]:
import numpy as np
lat = np.array(gts.Latitude)
lon = np.array(gts.Longitude)
In [17]:
from bokeh.io import output_notebook, output_file, show
from bokeh.models import (
GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)
In [18]:
map_options = GMapOptions(lat=30.29, lng=-97.73, map_type="roadmap", zoom=11)
plot = GMapPlot(
x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options, title="Austin"
)
In [19]:
source = ColumnDataSource(
data=dict(
lat=lat,
lon=lon,
)
)
In [20]:
circle = Circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, line_color=None)
plot.add_glyph(source, circle)
Out[20]:
In [21]:
# output_file('map_plot.html')
output_notebook()
In [22]:
plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
In [23]:
show(plot)
Out[23]: