In [1]:
%%HTML
<style>
.container{width:75% !important;}
.text_cell_rendered_html{width:20% !important;}
</style>
In [2]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.io import output_file, show, output_notebook
from bokeh.models import (
GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool, HoverTool, ResetTool,
)
import warnings
warnings.filterwarnings("ignore")
#from bokeh.resources import INLINE #Sometimes the CDM doesn't works
output_notebook()
In [3]:
import pandas as pd
import numpy as np
date_cols = ['last_scraped','host_since','last_review','first_review','calendar_last_scraped']
listings = pd.read_csv('data/listings_clean.csv',index_col='Unnamed: 0',parse_dates=date_cols).dropna(how='all',axis=1)
listings['is_location_exact'] = listings['is_location_exact'].astype(int)
In [4]:
listings['room_type'].value_counts()
Out[4]:
This is where you select how you want your map to be displayed. You can select the following options:
Get one here.
In [10]:
import ipywidgets as wid
from IPython.core.display import display
map_types = ['roadmap','satellite','hybrid', 'terrain']
maps = wid.ToggleButtons(options=map_types)
display(maps)
In [15]:
map_options = GMapOptions(lat=39.556471, lng=3.009915, map_type=maps.value, zoom=10)
#GmapPlot behaves like a regular bokeh plot object
plot = GMapPlot(x_range=DataRange1d(),
y_range=DataRange1d(),
tools=[PanTool(), WheelZoomTool(), ResetTool()],
output_backend='webgl',
map_options=map_options,
plot_width=1200,
plot_height=900)
plot.api_key = "AIzaSyApcLN67Xn9I8K_GrCcwANgB2-vzn8I5O4"
show(plot)
In order to overlay the data we need to create a glyph and add it to the GMapPlot. It is almost the same as creating an scatter plot from scratch.
We will map the price of each listing to a colormap using the ColormapPicker from the shaolin library. Then we will create a new column in our listing DataFrame that contains the color of each point.
In [16]:
from shaolin.dashboards.colormap import ColormapPicker
from shaolin.dashboards.data_transforms import DataFrameScaler
cmap = ColormapPicker()
cmap[0]
It is also possible to map other marker properties to data.
In [17]:
dfs = DataFrameScaler(listings[['is_location_exact']])
dfs[0]
In [18]:
color_col = 'room_type'#'is_location_exact'
listings['size'] = 5#dfs.output
listings['color'] = cmap.map_data(listings[color_col].rank(),hex=True)
Once this is done, we will create the ColumnDataSource for the scatter plot we will overlay on top of the map. Note that it is possible to add Hover tooltips to the scatterplot gliph.
In [19]:
list_source = ColumnDataSource(listings[['longitude','latitude','color','price','bedrooms','is_location_exact','size']])
circle = Circle(x="longitude", y="latitude", size='size', fill_color="color", fill_alpha=0.7, line_color=None)
plot.add_glyph(list_source, circle)
#Tooltip
hover = HoverTool(
tooltips=[
("Index", "$index"),
('Price','@price'),
("Exact location", "@is_location_exact"),
("Bedrooms", "@bedrooms"),
]
)
plot.add_tools(hover)
show(plot)
The shapefile format is a popular geospatial vector data format for geographic information system (GIS). It describes vector features such as points, lines or poligons. One file can contain multiple records of shapes that usually contain additional metadata that describes its attributes.
In [21]:
import cartopy
from cartopy import crs
shapef='data/recintos_municipales_inspire_peninbal_etrs89/recintos_municipales_inspire_peninbal_etrs89.shp'
shapes = cartopy.io.shapereader.Reader(shapef)
record = next(shapes.records())
record
Out[21]:
When loaded with cartopy, each record become a shapely multipolygon object. Shapely is a libary for manipulating geometric planar objects.
In [22]:
record.geometry
Out[22]:
In [23]:
record.geometry.area
Out[23]:
In [24]:
record.bounds
Out[24]:
In [25]:
record.attributes
Out[25]:
We will start by filtering out all the records that do no belong to Mallorca. In order to do so, we will have to find the corresponding name of the region following the NUTS standard.
In this standard, Mallorca is assigned ES532, and in the following image you can see the differents NUTS3 regions.
In [26]:
#only shapes from mallorca
mallorca_map = []
dels = []
for i,r in enumerate(list(shapes.records())):
if r.attributes['CODNUT3']=='ES532':
mallorca_map.append(r)
else:
dels.append(i)
As the names of the regions assigned to each listing do not match the names of the regions assigned to each record, we will need to find a way to relate the coordinates of a listing to a patch. We will do this by converting the coordinates of each listing to a shapely.geometry.Point, so we can compare points to patches.
If we measure the distance of a given point to the patches that we have, we will find that the distance will be 0 if the point is contained inside a given patch.
In [27]:
#match airbnb data cordinates to patches
listings['NAMEUNIT'] = np.nan
from shapely.geometry import Point
for ix,row in listings.iterrows():
for patch in mallorca_map:
point = Point(row['longitude'], row['latitude'])
if patch.geometry.distance(point) == 0:
listings.loc[ix,'NAMEUNIT'] = patch.attributes['NAMEUNIT']
break
Now that we have each point assigned to a patch, we can aggregate the data by region.
In [28]:
plot_listings = listings.groupby('NAMEUNIT')['price'].agg({'Num listings':len,
'Mean price':np.mean,
'Std price':np.std,
'Median price':np.median,
'Max price':np.max,
'Min price':np.min
}).reset_index().copy()
plot_listings.head()
Out[28]:
Geoviews is a library for easily plotting maps, either using bokeh or matplotlib as a backend.
In [29]:
import holoviews as hv
import geoviews as gv
hv.notebook_extension('bokeh','matplotlib')
%opts Image [colorbar=True] Curve [xrotation=60]
Once we have our dataframe of grouped data, we can plot a map using geoviews and holoviews. First we create a container for our aggregated data.
In [30]:
dataset = hv.Dataset(plot_listings)
A plot can be created by defining the following parameters:
In [31]:
mallorca_price = gv.Shape.from_records(mallorca_map, dataset,
on='NAMEUNIT', value='Mean price',
index=['NAMEUNIT',"Num listings","Median price",
"Mean price","Std price","Max price","Min price"],
group='Median price per day',
crs=crs.PlateCarree())
mallorca_num = gv.Shape.from_records(mallorca_map, dataset,
on='NAMEUNIT', value="Num listings",
index=['NAMEUNIT',"Num listings","Median price",
"Mean price","Std price","Max price","Min price"],
group='Avalable listings',
crs=crs.PlateCarree())
In [32]:
%%output backend='bokeh'
%%opts Points (fill_color='green',line_color='green',alpha=0.8) [xaxis=None yaxis=None tools=['hover'] width=600 height=600] Shape (cmap='Greens') [xaxis=None yaxis=None tools=['hover'] width=600 height=600]
po = hv.Points((listings['longitude'],listings['latitude']),label="Available listings")
po+mallorca_num
Out[32]:
In [33]:
%%output backend='bokeh'
%%opts Shape (cmap='Blues',line_color='black') [xaxis=None yaxis=None tools=['hover'] width=600 height=600, colorbar=True] Image [colorbar=True]
mallorca_price+mallorca_num
Out[33]:
In [ ]: