In [4]:
library(SparkR)
library(leaflet)
library(sp)
options(digits=15)
the_lat = 39.976057
the_lng = 116.330243
the_zoom = 15
epsilon = 200
source("pbfe.R")
In [5]:
data = read.csv("sample_small.csv")
names(data) = c("ID","lat","lng")
head(data)
In [6]:
map = leaflet() %>%
addTiles() %>%
addCircleMarkers(lng=data$lng, lat=data$lat,weight=2,fillOpacity=1,color="blue",radius=2)
file = 'map1.html'
htmlwidgets::saveWidget(map, file = file, selfcontained = F)
IRdisplay::display_html(paste("<iframe width=100% height=400 src=' ", file, " ' ","/>"))
In [7]:
sc <- sparkR.init("local[*]", "SparkR")
sqlContext <- sparkRSQL.init(sc)
In [8]:
dataRDD = SparkR:::textFile(sc,"sample_small.csv")
dataRDD = SparkR:::map(dataRDD, transformCoords)
In [9]:
schema <- structType(structField("id", "double"), structField("lng", "double"), structField("lat", "double"))
points <- createDataFrame(sqlContext, dataRDD, schema = schema)
cache(points)
In [10]:
head(points)
count(points)
In [11]:
registerTempTable(points, "p1")
registerTempTable(points, "p2")
In [12]:
sql = paste0("SELECT * FROM p1 DISTANCE JOIN p2 ON POINT(p2.lng, p2.lat) IN CIRCLERANGE(POINT(p1.lng, p1.lat), ",epsilon,") WHERE p2.id < p1.id")
pairs = sql(sqlContext,sql)
head(pairs)
nrow(pairs)
In [13]:
centers <- SparkR:::map(pairs, calculateDisk)
schema <- structType(structField("id1", "double"), structField("id2", "double"), structField("lng1", "double"), structField("lat1", "double"), structField("lng2", "double"), structField("lat2", "double"))
d <- createDataFrame(sqlContext, centers, schema = schema)
head(d)
count(d)
In [14]:
centers_lnglat <- SparkR:::map(centers, transformCenters)
disks <- as.data.frame(createDataFrame(sqlContext,centers_lnglat))
names(disks) = c("id1","id2","lng1","lat1","lng2","lat2")
head(disks)
nrow(disks)
In [15]:
p = sort(unique(c(disks$id1,disks$id2)))
data2 = data[p,]
map = leaflet() %>% setView(lat = the_lat, lng = the_lng, zoom = the_zoom) %>% addTiles() %>%
addCircles(lng=disks$lng1, lat=disks$lat1, weight=2, fillOpacity=0.25, color="red", radius = epsilon/2) %>%
addCircles(lng=disks$lng2, lat=disks$lat2, weight=2, fillOpacity=0.25, color="red", radius = epsilon/2) %>%
addCircleMarkers(lng=data$lng, lat=data$lat, weight=2, fillOpacity=1,radius = 2) %>%
addCircleMarkers(lng=data2$lng, lat=data2$lat, weight=2, fillOpacity=1, color="purple", radius = 2, popup=paste(data2$ID)) %>%
addProviderTiles("Esri.WorldImagery", group = "ESRI") %>%
addLayersControl(baseGroup = c("OSM(default)", "ESRI"))
file = 'map2.html'
htmlwidgets::saveWidget(map, file = file, selfcontained = F)
IRdisplay::display_html(paste("<iframe width=100% height=400 src=' ", file, " ' ","/>"))
In [16]:
registerTempTable(d, "d")
registerTempTable(points, "p")
In [17]:
sql = paste0("SELECT d.lng1 AS lng, d.lat1 AS lat, id AS id_member FROM d DISTANCE JOIN p ON POINT(p.lng, p.lat) IN CIRCLERANGE(POINT(d.lng1, d.lat1), ",(epsilon/2)+0.01,")")
mdisks = sql(sqlContext,sql)
registerTempTable(mdisks, "m")
sql = "SELECT lng, lat FROM m GROUP BY lng, lat HAVING count(id_member) >= 3"
mdisks1 = sql(sqlContext,sql)
In [18]:
sql = paste0("SELECT d.lng2 AS lng, d.lat2 AS lat, id AS id_member FROM d DISTANCE JOIN p ON POINT(p.lng, p.lat) IN CIRCLERANGE(POINT(d.lng2, d.lat2), ",(epsilon/2)+0.01,")")
mdisks = sql(sqlContext,sql)
registerTempTable(mdisks, "m")
sql = "SELECT lng, lat FROM m GROUP BY lng, lat HAVING count(id_member) >= 3"
mdisks2 = sql(sqlContext,sql)
In [19]:
mdisks = as.data.frame(rbind(mdisks1, mdisks2))
id = seq(1,nrow(mdisks))
mdisks$id = id
coordinates(mdisks) = ~lng+lat
proj4string(mdisks) = mercator
mdisks = spTransform(mdisks, wgs84)
mdisks$lng1 = coordinates(mdisks)[,1]
mdisks$lat1 = coordinates(mdisks)[,2]
In [20]:
map = leaflet() %>% setView(lat = the_lat, lng = the_lng, zoom = the_zoom) %>% addTiles() %>%
addCircles(lng=mdisks$lng1, lat=mdisks$lat1, weight=2, fillOpacity=0.25, color="blue", radius = epsilon/2) %>%
addCircleMarkers(lng=data$lng, lat=data$lat, weight=2, fillOpacity=1,radius = 2) %>%
addCircleMarkers(lng=data2$lng, lat=data2$lat, weight=2, fillOpacity=1, color="purple", radius = 2) %>%
addProviderTiles("Esri.WorldImagery", group = "ESRI") %>%
addLayersControl(baseGroup = c("OSM(default)", "ESRI"))
file = 'map3.html'
htmlwidgets::saveWidget(map, file = file, selfcontained = F)
IRdisplay::display_html(paste("<iframe width=100% height=400 src=' ", file, " ' ","/>"))
In [21]:
v1 = "656,659,660"
v1 = as.numeric(unlist(strsplit(v1,',')))
v1
v2 = c(656,657,659,660)
v2
prod(is.element(v1, v2))
4:3
In [22]:
m <- as.data.frame(rbind(mdisks1, mdisks2))
m$id = seq(1,nrow(m))
m = createDataFrame(sqlContext, m)
head(m)
count(m)
registerTempTable(m, "m")
head(points)
count(points)
sql = paste0("SELECT m.id AS mid, p.id AS pid FROM m DISTANCE JOIN p ON POINT(p.lng, p.lat) IN CIRCLERANGE (POINT(m.lng, m.lat), ",(epsilon/2)+0.01,") ORDER BY mid, pid")
t = sql(sqlContext,sql)
head(t, 30)
count(t)
In [23]:
library(sqldf)
In [24]:
t = as.data.frame(t)
g = sqldf("SELECT mid, group_concat(CAST(pid AS INT)) AS pids FROM t GROUP BY mid ORDER BY count(pid)")
head(g)
nrow(g)
tail(g)
In [25]:
n = c()
r = nrow(g)
for(i in 1:r){
disk_i = as.numeric(unlist(strsplit(g[i,2], ',')))
flag = 1
for(j in (i+1):r){
disk_j = as.numeric(unlist(strsplit(g[j,2], ',')))
# print(paste0("Disk 1: ", disk_i, " Disk 2: ", disk_j, " Result: ", is.element(disk_i, disk_j)))
if(prod(is.element(disk_i, disk_j)) == 1){
flag = 0
break
}
}
if(flag == 1){
n = c(n, i)
}
}
n = c(n, r)
n = unique(n)
In [26]:
g = g[n,]
m = as.data.frame(m)
maximal = sqldf("SELECT lng, lat, pids FROM g JOIN m ON(id = mid)")
head(maximal)
nrow(maximal)
coordinates(maximal) = ~lng+lat
proj4string(maximal) = mercator
maximal = spTransform(maximal, wgs84)
maximal$lng1 = coordinates(maximal)[,1]
maximal$lat1 = coordinates(maximal)[,2]
In [27]:
map = leaflet() %>% setView(lat = the_lat, lng = the_lng, zoom = the_zoom) %>% addTiles() %>%
addCircles(lng=maximal$lng1, lat=maximal$lat1, weight=2, fillOpacity=0.25, color="orange", radius = epsilon/2, popup = maximal$pids) %>%
addCircleMarkers(lng=data$lng, lat=data$lat, weight=2, fillOpacity=1,radius = 2) %>%
addCircleMarkers(lng=data2$lng, lat=data2$lat, weight=2, fillOpacity=1, color="purple", radius = 2) %>%
addProviderTiles("Esri.WorldImagery", group = "ESRI") %>%
addLayersControl(baseGroup = c("OSM(default)", "ESRI"))
file = 'map4.html'
htmlwidgets::saveWidget(map, file = file, selfcontained = F)
IRdisplay::display_html(paste("<iframe width=100% height=400 src=' ", file, " ' ","/>"))
In [29]:
map = leaflet() %>% setView(lat = the_lat, lng = the_lng, zoom = the_zoom) %>% addTiles() %>%
addCircleMarkers(lng=data$lng, lat=data$lat, weight=2, fillOpacity=1,radius = 2, group = "Points") %>%
addCircles(lng=disks$lng1, lat=disks$lat1, weight=2, fillOpacity=0.10, color="red", radius = epsilon/2, group = "Initial set") %>%
addCircles(lng=disks$lng2, lat=disks$lat2, weight=2, fillOpacity=0.10, color="red", radius = epsilon/2, group = "Initial set") %>%
addCircleMarkers(lng=data2$lng, lat=data2$lat, weight=2, fillOpacity=1, color="purple", radius=2, group = "Initial set") %>%
addCircles(lng=mdisks$lng1, lat=mdisks$lat1, weight=2, fillOpacity=0.20, color="blue", radius = epsilon/2, group = "Prune less than mu") %>%
addCircleMarkers(lng=data2$lng, lat=data2$lat, weight=2, fillOpacity=1, color="purple", radius=2, group = "Prune less than mu") %>%
addCircles(lng=maximal$lng1, lat=maximal$lat1, weight=2, fillOpacity=0.40, color="orange", radius = epsilon/2, popup = maximal$pids, group = "Prune redundant") %>%
addCircleMarkers(lng=data2$lng, lat=data2$lat, weight=2, fillOpacity=1, color="purple", radius=2, group = "Prune redundant") %>%
addProviderTiles("Esri.WorldImagery", group = "ESRI") %>%
addLayersControl(baseGroup = c("OSM(default)", "ESRI"),
overlayGroups = c("Initial set", "Prune less than mu", "Prune redundant", "Points"),
options = layersControlOptions(collapsed = FALSE, autoZIndex = FALSE))
file = 'map5.html'
htmlwidgets::saveWidget(map, file = file, selfcontained = F)
IRdisplay::display_html(paste("<iframe width=100% height=400 src=' ", file, " ' ","/>"))
In [ ]: