In [2]:
import pandas as pd
from shapely.geometry import Point
import datetime as dt
import geopandas as gpd
from fiona.crs import from_epsg
import pyproj
import pylab as pl
import mplleaflet
import folium
import shapefile as shp
import math
import branca.colormap as cm
import os

%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [3]:
file_name = "motoG4_062212.csv"
cell_length = 25 # usft
delete_empty_cell = True

# free wifi list
free_wifi = ['#flatiron free wifi', 'freewifibysurface',
             'bryantpark.org', 'DowntownBrooklynWiFi_Fon',
             'linknyc free wi-fi', 'Metrotech',
             'usp park wifi', 'Red Hook Wifi']

In [4]:
# Read File
df = pd.read_csv(file_name)

# convert Unix timestamp into readable timestamp
df['time2'] = map(lambda x: dt.datetime.fromtimestamp(x), df.time.astype(float)/1000)
df['month'] = map(lambda x: x.month, df['time2'])
df['day'] = map(lambda x: x.day, df['time2'])
df['hour'] = map(lambda x: x.hour, df['time2'])
df['minute'] = map(lambda x: x.minute, df['time2'])
df['sec'] = map(lambda x: x.second, df['time2'])

# Filter data according to datetime -> ! INPUT DATETIME MANUALLY
df2 = df.copy() # depends on your input data
# df2 = df2[((df2['month'] == 6) & (df2['day'] == 14)) | 
#           ((df2['month'] == 6) & (df2['day'] == 22) & (df2['hour'] <10))]

# geo
df2.reset_index(drop=True, inplace=True)
df2['geo'] = zip(df2.lng, df2.lat)
df2['geometry'] = map(lambda x: Point(x), zip(df2.lng, df2.lat))

# groupby geo, unique bssid
access_count = df2.groupby(df2.geo).apply(lambda x: len(x.bssid.unique()))
access_bssidList = df2.groupby(df2.geo).apply(lambda x: list(x.bssid.unique()))
df3 = pd.DataFrame(map(lambda x: Point(x), access_count.index), columns=['geometry'])
df3['unique_bssid_count'] = access_count.values
df3['unique_bssid_list'] = access_bssidList.values

# crs
df3= gpd.GeoDataFrame(df3)
df3.crs = from_epsg(4326)
df3.to_crs(epsg=2263, inplace=True)
df3.to_pickle('unique_bssid.p')

# grid boundry
all_x = map(lambda p: p.x, df3.geometry)
all_y = map(lambda p: p.y, df3.geometry)
minx, maxx, miny, maxy = min(all_x), max(all_x), min(all_y), max(all_y) 

# grid length
dx = cell_length
dy = cell_length
nx = int(math.ceil(abs(maxx - minx)/dx))
ny = int(math.ceil(abs(maxy - miny)/dy))

# grid plotting
w = shp.Writer(shp.POLYGON)
w.autoBalance = 1
w.field("ID")
id=0
for i in range(ny):
    for j in range(nx):
        id+=1
        vertices = []
        parts = []
        vertices.append([min(minx+dx*j,maxx),max(maxy-dy*i,miny)])
        vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*i,miny)])
        vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*(i+1),miny)])
        vertices.append([min(minx+dx*j,maxx),max(maxy-dy*(i+1),miny)])
        parts.append(vertices)
        w.poly(parts)
        w.record(id)
w.save('polygon_grid')

# read data: TBD
grid = gpd.read_file('./polygon_grid.shp')
grid.crs = from_epsg(2263)
uni_bssid = pd.read_pickle("./unique_bssid.p")
uni_bssid = gpd.GeoDataFrame(uni_bssid)
uni_bssid.crs = from_epsg(2263)

# geo points in which cell?
PointInPoly = gpd.sjoin(uni_bssid, grid, how='left', op='intersects')
PointInPoly.dropna(subset=['ID'], inplace=True) # ? why a few points don't intersect with any cell?

# groupby cell.ID to get list of bssid (with duplications) for each cell, then calculate length of unique bssid "uni"
grouped = PointInPoly.groupby('ID').apply(lambda x: reduce(lambda x,y: x+y, x.unique_bssid_list))
bssidInPoly = pd.DataFrame(grouped, columns=['all_bssid_list'])
bssidInPoly['unique_bssid_list'] = map(lambda x: set(x), grouped)
bssidInPoly['cum'] = map(lambda x: len(x), grouped)
bssidInPoly['uni'] = map(lambda x: len(set(x)), grouped)
bssidInPoly['ID'] = bssidInPoly.index
bssidInPoly.reset_index(drop=True, inplace=True)

# merge grid and bssidInPoly
grid_bssid = pd.merge(grid, bssidInPoly, how='left', on='ID')
grid_bssid.to_crs(epsg=2263, inplace=True)
if not delete_empty_cell: 
    grid_bssid.uni.fillna(inplace=True, value=0)
    
# Data for Plot
grid_plot = grid_bssid.loc[:, ['ID', 'uni', 'geometry']]
grid_plot.dropna(subset=['uni'], inplace=True) 

# plot color
import branca.colormap as cm
linear = cm.LinearColormap(['#000000',  'red'])
#colormap = linear.to_step(8)
colormap = linear
colormap.caption = '# of bssid (unique)'

# plot scale : Add 'style' - scale 'uni' values
scale = grid_plot.uni.max() - grid_plot.uni.min()
uni_scale = (grid_plot.uni - grid_plot.uni.min())/ scale
grid_plot['style'] = [{'fillColor': colormap(i), 
                       'weight': 0, 'color': 'black', 
                       'dashArray': '0, 0'} for i in uni_scale]

# plot
m = folium.Map([40.743, -74], zoom_start=15, tiles='Cartodb dark_matter', crs='EPSG3857')
folium.GeoJson(grid_plot, overlay=True).add_to(m)
m.add_child(colormap)
m.save(file_name + '.html')

In [5]:
# free_wifi_intersection
s1 = set(df2.ssid)
s2 = set(free_wifi)
free_wifi_intersection = list(s1.intersection(s2))


df4 = df2.copy()
df4 = gpd.GeoDataFrame(df4)
# df4 = df4[df4.ssid.isin(['linknyc free wi-fi'])]
df4 = df4[df4.ssid.isin(free_wifi_intersection)]
df4.crs = from_epsg(4326)
df4.to_crs(epsg=2263, inplace=True)
df4.drop(['time2', 'geo'], axis=1).to_file('hp_plot')

# redundant
free = gpd.read_file("./hp_plot/hp_plot.shp")
free.crs = from_epsg(2263)

freeInCell = gpd.sjoin(free, grid) #freeInCell.ID.unique()
freeCell = freeInCell.groupby(freeInCell.ID).apply(lambda x: median(x.level))

grid2 = grid.copy()
grid2.index = grid2.ID
freeGrid = grid2.join(pd.DataFrame(freeCell, columns=['median level']))
freeGrid.reset_index(drop=True, inplace=True)
#freeGrid['median level'].fillna(0, inplace=True)

freeGrid2 = freeGrid.copy()
freeGrid2.dropna(subset=['median level'], inplace=True)

# plot scale : Add 'style' - scale 'uni' values
scale_free = freeGrid2['median level'].max() - freeGrid2['median level'].min()
uni_scale_free = (freeGrid2['median level'] - freeGrid2['median level'].min()) / scale_free
freeGrid2['style'] = [{'fillColor': colormap(i), 
                       'weight': 0, 'color': 'black', 
                       'dashArray': '0, 0'} for i in uni_scale_free]


n = folium.Map([40.743, -74], zoom_start=15, tiles='Cartodb dark_matter', crs='EPSG3857')
folium.GeoJson(freeGrid2, overlay=True).add_to(n)
n.add_child(colormap)
n.save('free' + file_name + '.html')










todo

- [] Date Filter or Geo Filter Function
- [] carto-python
- [] colorbar - value - same range
       - For now, scale values to [0,1]
       - x_new = (x-min)/(max-min)


- [] Any record with 'NaN' bssid or ssid
- [] Check API - directly connect API - API query -> process -> plot


done

- [] Check max # of unique bsside : 200+



1. All wifi


In [6]:
m


Out[6]:

2. free wifi


In [7]:
n


Out[7]:

In [ ]:

DataFrame


In [14]:
# m
grid_plot.sort_values(ascending=False, by='uni').head()


Out[14]:
ID uni geometry style
1954 1955 218.0 POLYGON ((983608.382156704 212344.6018336249, ... {u'color': u'black', u'dashArray': u'0, 0', u'...
2155 2156 205.0 POLYGON ((983633.3821567043 212294.6018336238,... {u'color': u'black', u'dashArray': u'0, 0', u'...
3459 3460 202.0 POLYGON ((983733.3821567025 211969.6018336188,... {u'color': u'black', u'dashArray': u'0, 0', u'...
1955 1956 199.0 POLYGON ((983633.3821567037 212344.6018336203,... {u'color': u'black', u'dashArray': u'0, 0', u'...
2361 2362 195.0 POLYGON ((983783.3821567019 212244.6018336226,... {u'color': u'black', u'dashArray': u'0, 0', u'...

In [13]:
# n
freeGrid2.sort_values(ascending=False, by='median level').head()


Out[13]:
ID geometry median level style
6263 6264 POLYGON ((983833.3821567033 211269.6018336797,... -76.5 {u'color': u'black', u'dashArray': u'0, 0', u'...
6064 6065 POLYGON ((983858.3821567033 211319.6018336797,... -77.0 {u'color': u'black', u'dashArray': u'0, 0', u'...
6163 6164 POLYGON ((983833.3821567033 211294.6018336797,... -77.0 {u'color': u'black', u'dashArray': u'0, 0', u'...
6262 6263 POLYGON ((983808.3821567033 211269.6018336797,... -81.0 {u'color': u'black', u'dashArray': u'0, 0', u'...



Check


In [31]:
check1 = grid_bssid.dropna(subset=['unique_bssid_list']).sort_values(ascending=False, by='uni').head().iloc[0,3]

In [32]:
len(check1)


Out[32]:
218
    if cell_length = 50, len(check1) = 275.  
    if cell_length = 25, len(chekc1) = 218.
    Check!

    Further, Check ssid to see how many ssids and any ssid with a huge number of bssid?