notebook.community



In [2]:

    
import pandas as pd
from shapely.geometry import Point
import datetime as dt
import geopandas as gpd
from fiona.crs import from_epsg
import pyproj
import pylab as pl
import mplleaflet
import folium
import shapefile as shp
import math
import branca.colormap as cm
import os

%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [3]:

    
file_name = "motoG4_062212.csv"
cell_length = 25 # usft
delete_empty_cell = True

# free wifi list
free_wifi = ['#flatiron free wifi', 'freewifibysurface',
             'bryantpark.org', 'DowntownBrooklynWiFi_Fon',
             'linknyc free wi-fi', 'Metrotech',
             'usp park wifi', 'Red Hook Wifi']



In [4]:

    
# Read File
df = pd.read_csv(file_name)

# convert Unix timestamp into readable timestamp
df['time2'] = map(lambda x: dt.datetime.fromtimestamp(x), df.time.astype(float)/1000)
df['month'] = map(lambda x: x.month, df['time2'])
df['day'] = map(lambda x: x.day, df['time2'])
df['hour'] = map(lambda x: x.hour, df['time2'])
df['minute'] = map(lambda x: x.minute, df['time2'])
df['sec'] = map(lambda x: x.second, df['time2'])

# Filter data according to datetime -> ! INPUT DATETIME MANUALLY
df2 = df.copy() # depends on your input data
# df2 = df2[((df2['month'] == 6) & (df2['day'] == 14)) | 
#           ((df2['month'] == 6) & (df2['day'] == 22) & (df2['hour'] <10))]

# geo
df2.reset_index(drop=True, inplace=True)
df2['geo'] = zip(df2.lng, df2.lat)
df2['geometry'] = map(lambda x: Point(x), zip(df2.lng, df2.lat))

# groupby geo, unique bssid
access_count = df2.groupby(df2.geo).apply(lambda x: len(x.bssid.unique()))
access_bssidList = df2.groupby(df2.geo).apply(lambda x: list(x.bssid.unique()))
df3 = pd.DataFrame(map(lambda x: Point(x), access_count.index), columns=['geometry'])
df3['unique_bssid_count'] = access_count.values
df3['unique_bssid_list'] = access_bssidList.values

# crs
df3= gpd.GeoDataFrame(df3)
df3.crs = from_epsg(4326)
df3.to_crs(epsg=2263, inplace=True)
df3.to_pickle('unique_bssid.p')

# grid boundry
all_x = map(lambda p: p.x, df3.geometry)
all_y = map(lambda p: p.y, df3.geometry)
minx, maxx, miny, maxy = min(all_x), max(all_x), min(all_y), max(all_y) 

# grid length
dx = cell_length
dy = cell_length
nx = int(math.ceil(abs(maxx - minx)/dx))
ny = int(math.ceil(abs(maxy - miny)/dy))

# grid plotting
w = shp.Writer(shp.POLYGON)
w.autoBalance = 1
w.field("ID")
id=0
for i in range(ny):
    for j in range(nx):
        id+=1
        vertices = []
        parts = []
        vertices.append([min(minx+dx*j,maxx),max(maxy-dy*i,miny)])
        vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*i,miny)])
        vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*(i+1),miny)])
        vertices.append([min(minx+dx*j,maxx),max(maxy-dy*(i+1),miny)])
        parts.append(vertices)
        w.poly(parts)
        w.record(id)
w.save('polygon_grid')

# read data: TBD
grid = gpd.read_file('./polygon_grid.shp')
grid.crs = from_epsg(2263)
uni_bssid = pd.read_pickle("./unique_bssid.p")
uni_bssid = gpd.GeoDataFrame(uni_bssid)
uni_bssid.crs = from_epsg(2263)

# geo points in which cell?
PointInPoly = gpd.sjoin(uni_bssid, grid, how='left', op='intersects')
PointInPoly.dropna(subset=['ID'], inplace=True) # ? why a few points don't intersect with any cell?

# groupby cell.ID to get list of bssid (with duplications) for each cell, then calculate length of unique bssid "uni"
grouped = PointInPoly.groupby('ID').apply(lambda x: reduce(lambda x,y: x+y, x.unique_bssid_list))
bssidInPoly = pd.DataFrame(grouped, columns=['all_bssid_list'])
bssidInPoly['unique_bssid_list'] = map(lambda x: set(x), grouped)
bssidInPoly['cum'] = map(lambda x: len(x), grouped)
bssidInPoly['uni'] = map(lambda x: len(set(x)), grouped)
bssidInPoly['ID'] = bssidInPoly.index
bssidInPoly.reset_index(drop=True, inplace=True)

# merge grid and bssidInPoly
grid_bssid = pd.merge(grid, bssidInPoly, how='left', on='ID')
grid_bssid.to_crs(epsg=2263, inplace=True)
if not delete_empty_cell: 
    grid_bssid.uni.fillna(inplace=True, value=0)
    
# Data for Plot
grid_plot = grid_bssid.loc[:, ['ID', 'uni', 'geometry']]
grid_plot.dropna(subset=['uni'], inplace=True) 

# plot color
import branca.colormap as cm
linear = cm.LinearColormap(['#000000',  'red'])
#colormap = linear.to_step(8)
colormap = linear
colormap.caption = '# of bssid (unique)'

# plot scale : Add 'style' - scale 'uni' values
scale = grid_plot.uni.max() - grid_plot.uni.min()
uni_scale = (grid_plot.uni - grid_plot.uni.min())/ scale
grid_plot['style'] = [{'fillColor': colormap(i), 
                       'weight': 0, 'color': 'black', 
                       'dashArray': '0, 0'} for i in uni_scale]

# plot
m = folium.Map([40.743, -74], zoom_start=15, tiles='Cartodb dark_matter', crs='EPSG3857')
folium.GeoJson(grid_plot, overlay=True).add_to(m)
m.add_child(colormap)
m.save(file_name + '.html')



In [5]:

    
# free_wifi_intersection
s1 = set(df2.ssid)
s2 = set(free_wifi)
free_wifi_intersection = list(s1.intersection(s2))


df4 = df2.copy()
df4 = gpd.GeoDataFrame(df4)
# df4 = df4[df4.ssid.isin(['linknyc free wi-fi'])]
df4 = df4[df4.ssid.isin(free_wifi_intersection)]
df4.crs = from_epsg(4326)
df4.to_crs(epsg=2263, inplace=True)
df4.drop(['time2', 'geo'], axis=1).to_file('hp_plot')

# redundant
free = gpd.read_file("./hp_plot/hp_plot.shp")
free.crs = from_epsg(2263)

freeInCell = gpd.sjoin(free, grid) #freeInCell.ID.unique()
freeCell = freeInCell.groupby(freeInCell.ID).apply(lambda x: median(x.level))

grid2 = grid.copy()
grid2.index = grid2.ID
freeGrid = grid2.join(pd.DataFrame(freeCell, columns=['median level']))
freeGrid.reset_index(drop=True, inplace=True)
#freeGrid['median level'].fillna(0, inplace=True)

freeGrid2 = freeGrid.copy()
freeGrid2.dropna(subset=['median level'], inplace=True)

# plot scale : Add 'style' - scale 'uni' values
scale_free = freeGrid2['median level'].max() - freeGrid2['median level'].min()
uni_scale_free = (freeGrid2['median level'] - freeGrid2['median level'].min()) / scale_free
freeGrid2['style'] = [{'fillColor': colormap(i), 
                       'weight': 0, 'color': 'black', 
                       'dashArray': '0, 0'} for i in uni_scale_free]


n = folium.Map([40.743, -74], zoom_start=15, tiles='Cartodb dark_matter', crs='EPSG3857')
folium.GeoJson(freeGrid2, overlay=True).add_to(n)
n.add_child(colormap)
n.save('free' + file_name + '.html')

todo

- [] Date Filter or Geo Filter Function
- [] carto-python
- [] colorbar - value - same range
       - For now, scale values to [0,1]
       - x_new = (x-min)/(max-min)


- [] Any record with 'NaN' bssid or ssid
- [] Check API - directly connect API - API query -> process -> plot

done

- [] Check max # of unique bsside : 200+

1. All wifi



In [6]:

    
m









    Out[6]:

2. free wifi



In [7]:

    
n









    Out[7]:



In [ ]:

DataFrame



In [14]:

    
# m
grid_plot.sort_values(ascending=False, by='uni').head()









    Out[14]:







  
    
      
      ID
      uni
      geometry
      style
    
  
  
    
      1954
      1955
      218.0
      POLYGON ((983608.382156704 212344.6018336249, ...
      {u'color': u'black', u'dashArray': u'0, 0', u'...
    
    
      2155
      2156
      205.0
      POLYGON ((983633.3821567043 212294.6018336238,...
      {u'color': u'black', u'dashArray': u'0, 0', u'...
    
    
      3459
      3460
      202.0
      POLYGON ((983733.3821567025 211969.6018336188,...
      {u'color': u'black', u'dashArray': u'0, 0', u'...
    
    
      1955
      1956
      199.0
      POLYGON ((983633.3821567037 212344.6018336203,...
      {u'color': u'black', u'dashArray': u'0, 0', u'...
    
    
      2361
      2362
      195.0
      POLYGON ((983783.3821567019 212244.6018336226,...
      {u'color': u'black', u'dashArray': u'0, 0', u'...



In [13]:

    
# n
freeGrid2.sort_values(ascending=False, by='median level').head()









    Out[13]:







  
    
      
      ID
      geometry
      median level
      style
    
  
  
    
      6263
      6264
      POLYGON ((983833.3821567033 211269.6018336797,...
      -76.5
      {u'color': u'black', u'dashArray': u'0, 0', u'...
    
    
      6064
      6065
      POLYGON ((983858.3821567033 211319.6018336797,...
      -77.0
      {u'color': u'black', u'dashArray': u'0, 0', u'...
    
    
      6163
      6164
      POLYGON ((983833.3821567033 211294.6018336797,...
      -77.0
      {u'color': u'black', u'dashArray': u'0, 0', u'...
    
    
      6262
      6263
      POLYGON ((983808.3821567033 211269.6018336797,...
      -81.0
      {u'color': u'black', u'dashArray': u'0, 0', u'...

Check



In [31]:

    
check1 = grid_bssid.dropna(subset=['unique_bssid_list']).sort_values(ascending=False, by='uni').head().iloc[0,3]



In [32]:

    
len(check1)









    Out[32]:





218

carto: cell_length = 50

    if cell_length = 50, len(check1) = 275.  
    if cell_length = 25, len(chekc1) = 218.
    Check!

    Further, Check ssid to see how many ssids and any ssid with a huge number of bssid?

	ID	uni	geometry	style
1954	1955	218.0	POLYGON ((983608.382156704 212344.6018336249, ...	{u'color': u'black', u'dashArray': u'0, 0', u'...
2155	2156	205.0	POLYGON ((983633.3821567043 212294.6018336238,...	{u'color': u'black', u'dashArray': u'0, 0', u'...
3459	3460	202.0	POLYGON ((983733.3821567025 211969.6018336188,...	{u'color': u'black', u'dashArray': u'0, 0', u'...
1955	1956	199.0	POLYGON ((983633.3821567037 212344.6018336203,...	{u'color': u'black', u'dashArray': u'0, 0', u'...
2361	2362	195.0	POLYGON ((983783.3821567019 212244.6018336226,...	{u'color': u'black', u'dashArray': u'0, 0', u'...

	ID	geometry	median level	style
6263	6264	POLYGON ((983833.3821567033 211269.6018336797,...	-76.5	{u'color': u'black', u'dashArray': u'0, 0', u'...
6064	6065	POLYGON ((983858.3821567033 211319.6018336797,...	-77.0	{u'color': u'black', u'dashArray': u'0, 0', u'...
6163	6164	POLYGON ((983833.3821567033 211294.6018336797,...	-77.0	{u'color': u'black', u'dashArray': u'0, 0', u'...
6262	6263	POLYGON ((983808.3821567033 211269.6018336797,...	-81.0	{u'color': u'black', u'dashArray': u'0, 0', u'...