In [2]:
import pandas as pd
from shapely.geometry import Point
import datetime as dt
import geopandas as gpd
from fiona.crs import from_epsg
import pyproj
import pylab as pl
import mplleaflet
import folium
import shapefile as shp
import math
import branca.colormap as cm
import os
%pylab inline
In [3]:
file_name = "motoG4_062212.csv"
cell_length = 25 # usft
delete_empty_cell = True
# free wifi list
free_wifi = ['#flatiron free wifi', 'freewifibysurface',
'bryantpark.org', 'DowntownBrooklynWiFi_Fon',
'linknyc free wi-fi', 'Metrotech',
'usp park wifi', 'Red Hook Wifi']
In [4]:
# Read File
df = pd.read_csv(file_name)
# convert Unix timestamp into readable timestamp
df['time2'] = map(lambda x: dt.datetime.fromtimestamp(x), df.time.astype(float)/1000)
df['month'] = map(lambda x: x.month, df['time2'])
df['day'] = map(lambda x: x.day, df['time2'])
df['hour'] = map(lambda x: x.hour, df['time2'])
df['minute'] = map(lambda x: x.minute, df['time2'])
df['sec'] = map(lambda x: x.second, df['time2'])
# Filter data according to datetime -> ! INPUT DATETIME MANUALLY
df2 = df.copy() # depends on your input data
# df2 = df2[((df2['month'] == 6) & (df2['day'] == 14)) |
# ((df2['month'] == 6) & (df2['day'] == 22) & (df2['hour'] <10))]
# geo
df2.reset_index(drop=True, inplace=True)
df2['geo'] = zip(df2.lng, df2.lat)
df2['geometry'] = map(lambda x: Point(x), zip(df2.lng, df2.lat))
# groupby geo, unique bssid
access_count = df2.groupby(df2.geo).apply(lambda x: len(x.bssid.unique()))
access_bssidList = df2.groupby(df2.geo).apply(lambda x: list(x.bssid.unique()))
df3 = pd.DataFrame(map(lambda x: Point(x), access_count.index), columns=['geometry'])
df3['unique_bssid_count'] = access_count.values
df3['unique_bssid_list'] = access_bssidList.values
# crs
df3= gpd.GeoDataFrame(df3)
df3.crs = from_epsg(4326)
df3.to_crs(epsg=2263, inplace=True)
df3.to_pickle('unique_bssid.p')
# grid boundry
all_x = map(lambda p: p.x, df3.geometry)
all_y = map(lambda p: p.y, df3.geometry)
minx, maxx, miny, maxy = min(all_x), max(all_x), min(all_y), max(all_y)
# grid length
dx = cell_length
dy = cell_length
nx = int(math.ceil(abs(maxx - minx)/dx))
ny = int(math.ceil(abs(maxy - miny)/dy))
# grid plotting
w = shp.Writer(shp.POLYGON)
w.autoBalance = 1
w.field("ID")
id=0
for i in range(ny):
for j in range(nx):
id+=1
vertices = []
parts = []
vertices.append([min(minx+dx*j,maxx),max(maxy-dy*i,miny)])
vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*i,miny)])
vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*(i+1),miny)])
vertices.append([min(minx+dx*j,maxx),max(maxy-dy*(i+1),miny)])
parts.append(vertices)
w.poly(parts)
w.record(id)
w.save('polygon_grid')
# read data: TBD
grid = gpd.read_file('./polygon_grid.shp')
grid.crs = from_epsg(2263)
uni_bssid = pd.read_pickle("./unique_bssid.p")
uni_bssid = gpd.GeoDataFrame(uni_bssid)
uni_bssid.crs = from_epsg(2263)
# geo points in which cell?
PointInPoly = gpd.sjoin(uni_bssid, grid, how='left', op='intersects')
PointInPoly.dropna(subset=['ID'], inplace=True) # ? why a few points don't intersect with any cell?
# groupby cell.ID to get list of bssid (with duplications) for each cell, then calculate length of unique bssid "uni"
grouped = PointInPoly.groupby('ID').apply(lambda x: reduce(lambda x,y: x+y, x.unique_bssid_list))
bssidInPoly = pd.DataFrame(grouped, columns=['all_bssid_list'])
bssidInPoly['unique_bssid_list'] = map(lambda x: set(x), grouped)
bssidInPoly['cum'] = map(lambda x: len(x), grouped)
bssidInPoly['uni'] = map(lambda x: len(set(x)), grouped)
bssidInPoly['ID'] = bssidInPoly.index
bssidInPoly.reset_index(drop=True, inplace=True)
# merge grid and bssidInPoly
grid_bssid = pd.merge(grid, bssidInPoly, how='left', on='ID')
grid_bssid.to_crs(epsg=2263, inplace=True)
if not delete_empty_cell:
grid_bssid.uni.fillna(inplace=True, value=0)
# Data for Plot
grid_plot = grid_bssid.loc[:, ['ID', 'uni', 'geometry']]
grid_plot.dropna(subset=['uni'], inplace=True)
# plot color
import branca.colormap as cm
linear = cm.LinearColormap(['#000000', 'red'])
#colormap = linear.to_step(8)
colormap = linear
colormap.caption = '# of bssid (unique)'
# plot scale : Add 'style' - scale 'uni' values
scale = grid_plot.uni.max() - grid_plot.uni.min()
uni_scale = (grid_plot.uni - grid_plot.uni.min())/ scale
grid_plot['style'] = [{'fillColor': colormap(i),
'weight': 0, 'color': 'black',
'dashArray': '0, 0'} for i in uni_scale]
# plot
m = folium.Map([40.743, -74], zoom_start=15, tiles='Cartodb dark_matter', crs='EPSG3857')
folium.GeoJson(grid_plot, overlay=True).add_to(m)
m.add_child(colormap)
m.save(file_name + '.html')
In [5]:
# free_wifi_intersection
s1 = set(df2.ssid)
s2 = set(free_wifi)
free_wifi_intersection = list(s1.intersection(s2))
df4 = df2.copy()
df4 = gpd.GeoDataFrame(df4)
# df4 = df4[df4.ssid.isin(['linknyc free wi-fi'])]
df4 = df4[df4.ssid.isin(free_wifi_intersection)]
df4.crs = from_epsg(4326)
df4.to_crs(epsg=2263, inplace=True)
df4.drop(['time2', 'geo'], axis=1).to_file('hp_plot')
# redundant
free = gpd.read_file("./hp_plot/hp_plot.shp")
free.crs = from_epsg(2263)
freeInCell = gpd.sjoin(free, grid) #freeInCell.ID.unique()
freeCell = freeInCell.groupby(freeInCell.ID).apply(lambda x: median(x.level))
grid2 = grid.copy()
grid2.index = grid2.ID
freeGrid = grid2.join(pd.DataFrame(freeCell, columns=['median level']))
freeGrid.reset_index(drop=True, inplace=True)
#freeGrid['median level'].fillna(0, inplace=True)
freeGrid2 = freeGrid.copy()
freeGrid2.dropna(subset=['median level'], inplace=True)
# plot scale : Add 'style' - scale 'uni' values
scale_free = freeGrid2['median level'].max() - freeGrid2['median level'].min()
uni_scale_free = (freeGrid2['median level'] - freeGrid2['median level'].min()) / scale_free
freeGrid2['style'] = [{'fillColor': colormap(i),
'weight': 0, 'color': 'black',
'dashArray': '0, 0'} for i in uni_scale_free]
n = folium.Map([40.743, -74], zoom_start=15, tiles='Cartodb dark_matter', crs='EPSG3857')
folium.GeoJson(freeGrid2, overlay=True).add_to(n)
n.add_child(colormap)
n.save('free' + file_name + '.html')
- [] Date Filter or Geo Filter Function
- [] carto-python
- [] colorbar - value - same range
- For now, scale values to [0,1]
- x_new = (x-min)/(max-min)
- [] Any record with 'NaN' bssid or ssid
- [] Check API - directly connect API - API query -> process -> plot
- [] Check max # of unique bsside : 200+
In [6]:
m
Out[6]:
In [7]:
n
Out[7]:
In [ ]:
In [14]:
# m
grid_plot.sort_values(ascending=False, by='uni').head()
Out[14]:
In [13]:
# n
freeGrid2.sort_values(ascending=False, by='median level').head()
Out[13]:
In [31]:
check1 = grid_bssid.dropna(subset=['unique_bssid_list']).sort_values(ascending=False, by='uni').head().iloc[0,3]
In [32]:
len(check1)
Out[32]:
if cell_length = 50, len(check1) = 275.
if cell_length = 25, len(chekc1) = 218.
Check!
Further, Check ssid to see how many ssids and any ssid with a huge number of bssid?