In [23]:
import pandas as pd
from shapely.geometry import Point
import datetime as dt
import geopandas as gpd
from fiona.crs import from_epsg
import pyproj
import pylab as pl
import mplleaflet
import folium
import shapefile as shp
import math
import os
import urllib
import urllib2
import requests
import json
%pylab inline
In [71]:
starttime = '06/29/2017'
batch = '500' # how many records you want to fetch
url = 'http://wifindproject.com/wifipulling/?columns=lat%7Clng%7Cssid%7Cbssid%7Ctime&startdate='+\
starttime+'&batch='+batch+'&timeformat=1'
resp = requests.get(url)
print url
Out[71]:
In [74]:
#file_name = "motoG4_062212.csv"
cell_length = 50 # usft
delete_empty_cell = True
# All or Only Free
only_free_wifi = False
# free wifi list
free_wifi = ['#flatiron free wifi', 'freewifibysurface',
'bryantpark.org', 'DowntownBrooklynWiFi_Fon',
'linknyc free wi-fi', 'Metrotech',
'usp park wifi', 'Red Hook Wifi']
In [75]:
# Read File
#df = pd.read_csv(file_name)
df = pd.read_json(resp.text)
# for Free WiFi
s1 = set(df.ssid); s2 = set(free_wifi); free_wifi_intersection = list(s1.intersection(s2))
if only_free_wifi:
df = df[df['ssid'].isin(free_wifi_intersection)]
#df = df[df['ssid'] == 'linknyc free wi-fi']
In [76]:
# convert Unix timestamp into readable timestamp
#df['time2'] = map(lambda x: dt.datetime.fromtimestamp(x), df.time.astype(float)/1000)
df['time2'] = pd.to_datetime(df.time)
df['month'] = map(lambda x: x.month, df['time2'])
df['day'] = map(lambda x: x.day, df['time2'])
df['hour'] = map(lambda x: x.hour, df['time2'])
df['minute'] = map(lambda x: x.minute, df['time2'])
df['sec'] = map(lambda x: x.second, df['time2'])
# Filter data according to datetime -> ! INPUT DATETIME MANUALLY
df2 = df.copy() # depends on your input data
if file_name == "dj.csv":
df2 = df2[((df2['month'] == 6) & (df2['day'] == 14)) | ((df2['month'] == 6) & (df2['day'] == 22) & (df2['hour'] <10)) | ((df2['month'] == 6) & (df2['day'] == 29))]
# geo
df2.reset_index(drop=True, inplace=True)
df2['geo'] = zip(df2.lng, df2.lat)
df2['geometry'] = map(lambda x: Point(x), zip(df2.lng, df2.lat))
# groupby geo, unique bssid
access_count = df2.groupby(df2.geo).apply(lambda x: len(x.bssid.unique()))
access_bssidList = df2.groupby(df2.geo).apply(lambda x: list(x.bssid.unique()))
df3 = pd.DataFrame(map(lambda x: Point(x), access_count.index), columns=['geometry'])
df3['unique_bssid_count'] = access_count.values
df3['unique_bssid_list'] = access_bssidList.values
# crs
df3= gpd.GeoDataFrame(df3)
df3.crs = from_epsg(4326)
df3.to_crs(epsg=2263, inplace=True)
df3.to_pickle('unique_bssid.p')
# grid boundry
all_x = map(lambda p: p.x, df3.geometry)
all_y = map(lambda p: p.y, df3.geometry)
minx, maxx, miny, maxy = min(all_x), max(all_x), min(all_y), max(all_y)
# grid length
dx = cell_length
dy = cell_length
nx = int(math.ceil(abs(maxx - minx)/dx))
ny = int(math.ceil(abs(maxy - miny)/dy))
# grid plotting
w = shp.Writer(shp.POLYGON)
w.autoBalance = 1
w.field("ID")
id=0
for i in range(ny):
for j in range(nx):
id+=1
vertices = []
parts = []
vertices.append([min(minx+dx*j,maxx),max(maxy-dy*i,miny)])
vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*i,miny)])
vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*(i+1),miny)])
vertices.append([min(minx+dx*j,maxx),max(maxy-dy*(i+1),miny)])
parts.append(vertices)
w.poly(parts)
w.record(id)
w.save('polygon_grid')
# read data: TBD
grid = gpd.read_file('./polygon_grid.shp')
grid.crs = from_epsg(2263)
uni_bssid = pd.read_pickle("./unique_bssid.p")
uni_bssid = gpd.GeoDataFrame(uni_bssid)
uni_bssid.crs = from_epsg(2263)
# geo points in which cell?
PointInPoly = gpd.sjoin(uni_bssid, grid, how='left', op='intersects')
PointInPoly.dropna(subset=['ID'], inplace=True) # ? why a few points don't intersect with any cell?
# groupby cell.ID to get list of bssid (with duplications) for each cell, then calculate length of unique bssid "uni"
grouped = PointInPoly.groupby('ID').apply(lambda x: reduce(lambda x,y: x+y, x.unique_bssid_list))
bssidInPoly = pd.DataFrame(grouped, columns=['all_bssid_list'])
bssidInPoly['unique_bssid_list'] = map(lambda x: set(x), grouped)
bssidInPoly['cum'] = map(lambda x: len(x), grouped)
bssidInPoly['uni'] = map(lambda x: len(set(x)), grouped)
bssidInPoly['ID'] = bssidInPoly.index
bssidInPoly.reset_index(drop=True, inplace=True)
# merge grid and bssidInPoly
grid_bssid = pd.merge(grid, bssidInPoly, how='left', on='ID')
grid_bssid.to_crs(epsg=2263, inplace=True)
if not delete_empty_cell:
grid_bssid.uni.fillna(inplace=True, value=0)
# Data for Plot
grid_plot = grid_bssid.loc[:, ['ID', 'uni', 'geometry']]
grid_plot.dropna(subset=['uni'], inplace=True)
grid_plot.to_file(file_name.split(".")[0])
In [78]:
# grid_plot