In [9]:
import time
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib import colors,colorbar
%matplotlib inline
import csv
import math
from math import radians, cos, sin, asin, sqrt
from scipy import stats
import math
import cPickle
In [10]:
import bq
client = bq.Client.Get()
In [11]:
def Query(q):
t0 = time.time()
answer = client.ReadTableRows(client.Query(q)['configuration']['query']['destinationTable'])
print 'Query time: ' + str(time.time() - t0) + ' seconds.'
return answer
In [4]:
def query_date(thedate):
q = '''
SELECT
integer(FLOOR(first_lat*10)) lat_bin,
integer(FLOOR(first_lon*10)) lon_bin,
integer(FLOOR(avg_lat*10)) lat_bin_avg,
integer(FLOOR(avg_lon*10)) lon_bin_avg,
satellite_positions sat_positions,
terrestrial_positions terrestrial_positions,
positions_weighted,
avg_speed,
slow_pings,
mmsi
FROM
(SELECT
mmsi,
SUM( CASE WHEN speed = 0 OR (speed<=2 AND type IN (18, 19)) THEN 180
WHEN (speed > 0 AND speed <14 AND type IN (1,2,3)AND turn = 0 )
OR (speed>2 AND type IN (18,19)) THEN 10
when speed>0 and speed<14 and type in (1,2,3) and turn !=0 then 3.3
when speed>=14 and speed<23 and type in (1,2,3) and turn = 0 then 6
when type in (1,2,3) and (speed>=23 or (speed>=14 and turn !=0)) then 2
END) positions_weighted,
first(lat) first_lat,
first(lon) first_lon,
avg(lat) avg_lat,
avg(lon) avg_lon,
max(lat) max_lat,
min(lat) min_lat,
max(lon) max_lon,
min(lon) min_lon,
avg(speed) avg_speed,
sum(if( (speed=0 and type in (1,2,3)) or (speed<2 and type in (18,19)),1,0 )) slow_pings,
sum( if(REGEXP_REPLACE(tagblock_station, 'u', '') IN ('rORBCOMM000',
'rORBCOMM01',
'rORBCOMM008',
'rORBCOMM009',
'rORBCOMM010'),1,0)) terrestrial_positions,
sum( if(REGEXP_REPLACE(tagblock_station, 'u', '') not IN ('rORBCOMM000',
'rORBCOMM01',
'rORBCOMM008',
'rORBCOMM009',
'rORBCOMM010'),1,0)) satellite_positions,
FROM
[pipeline_normalize.'''+thedate+''']
WHERE
type IN (1,2,3,18,19) and lat is not null and lon is not null and speed is not null and turn is not null
group by mmsi
)
where
max_lat - min_lat <5
AND (max_lon - min_lon < 10
OR first_lon > 170
OR first_lon < -170)
AND mmsi IN (select mmsi from
[scratch_david_gapanalysis.good_mmsi_2015_1000pings])
'''
print q
# positions = Query(q)
# cPickle.dump(positions, open('../../data/density/'+thedate+'_v2_vessels.p', 'wb'))
In [5]:
query_date('20130701')
In [15]:
for i in range(1,32):
thedate = "201503"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
query_date(thedate)
In [3]:
import csv
for i in range(1,32):
thedate = "201501"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
positions = cPickle.load(open('../../data/density/'+thedate+'_v2_vessels.p', 'rb'))
with open('../../data/density/'+thedate+'_v2_vessels.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerow(["lat_bin","lon_bin","lat_bin_avg","lon_bin_avg","sat_positions",
"terrestrial_positions","positions_weighted",
"avg_speed","slow_pings","mmsi"])
writer.writerows(positions)
In [5]:
for i in range(1,29):
thedate = "201502"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
positions = cPickle.load(open('../../data/density/'+thedate+'_v2_vessels.p', 'rb'))
with open('../../data/density/'+thedate+'_v2_vessels.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerow(["lat_bin","lon_bin","lat_bin_avg","lon_bin_avg","sat_positions",
"terrestrial_positions","positions_weighted",
"avg_speed","slow_pings","mmsi"])
writer.writerows(positions)
In [4]:
for i in range(1,31):
thedate = "201503"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
positions = cPickle.load(open('../../data/density/'+thedate+'_v2_vessels.p', 'rb'))
with open('../../data/density/'+thedate+'_v2_vessels.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerow(["lat_bin","lon_bin","lat_bin_avg","lon_bin_avg","sat_positions",
"terrestrial_positions","positions_weighted",
"avg_speed","slow_pings","mmsi"])
writer.writerows(positions)
In [6]:
# for i in range(31,32):
# thedate = "201503"
# if i<10:
# thedate += "0"+str(i)
# else:
# thedate += str(i)
# print thedate
# positions = cPickle.load(open('../../data/density/'+thedate+'_v2_vessels.p', 'rb'))
# with open('../../data/density/'+thedate+'_v2_vessels.csv', 'wb') as f:
# writer = csv.writer(f)
# writer.writerow(["lat_bin","lon_bin","lat_bin_avg","lon_bin_avg","sat_positions",
# "terrestrial_positions","positions_weighted",
# "avg_speed","slow_pings","mmsi"])
# writer.writerows(positions)
In [6]:
from math import radians, cos, sin, asin, sqrt
%matplotlib inline
from matplotlib import colors,colorbar
from mpl_toolkits.basemap import Basemap
import bq
import csv
import matplotlib.pyplot as plt
import numpy as np
import time
import math
from IPython.core.display import display, HTML
client = bq.Client.Get()
In [8]:
def makeMap(grid,fig_title,bar_title,fig_min_value,fig_max_value,colormap='vidris',
map_width=10, map_height=6):
fig = plt.figure(figsize=(map_width,map_height))
firstlat = max_lat
lastlat = min_lat
firstlon = min_lon
lastlon = max_lon
scale = cellsize
numlats = int((firstlat-lastlat)/scale+.5)
numlons = int((lastlon-firstlon)/scale+.5)
lat_boxes = np.linspace(lastlat,firstlat,num=numlats,endpoint=False)
lon_boxes = np.linspace(firstlon,lastlon,num=numlons,endpoint=False)
extra = 0
m = Basemap(llcrnrlat=lastlat-extra, urcrnrlat=firstlat+extra,
llcrnrlon=firstlon-extra, urcrnrlon=lastlon+extra, lat_ts=0, projection='mill',resolution="h")
m.drawmapboundary()#fill_color='#111111')
m.fillcontinents('#cccccc',lake_color='#cccccc')#, lake_color, ax, zorder, alpha)
x = np.linspace(firstlon, lastlon, -(firstlon-lastlon)*one_over_cellsize+1)
y = np.linspace(lastlat, firstlat, (firstlat-lastlat)*one_over_cellsize+1)
x, y = np.meshgrid(x, y)
converted_x, converted_y = m(x, y)
from matplotlib import colors,colorbar
maximum = fig_max_value # grid.max()
minimum = fig_min_value #1
norm = colors.LogNorm(vmin=minimum, vmax=maximum)
# norm = colors.Normalize(vmin=0, vmax=1000)
m.pcolormesh(converted_x, converted_y, grid, norm=norm, vmin=minimum, vmax=maximum, cmap = colormap)
t = fig_title
plt.title(t, color = "#000000", fontsize=18)
ax = fig.add_axes([0.2, 0.1, 0.65, 0.02]) #x coordinate ,
norm = colors.LogNorm(vmin=minimum, vmax=maximum)
# norm = colors.Normalize(vmin=0, vmax=1000)
lvls = np.logspace(np.log10(minimum),np.log10(maximum),num=6)
cb = colorbar.ColorbarBase(ax,norm = norm, orientation='horizontal', ticks=lvls, cmap = colormap)
#cb.ax.set_xticklabels(["0" ,round(m3**.5,1), m3, round(m3**1.5,1), m3*m3,round(m3**2.5,1), str(round(m3**3,1))+"+"], fontsize=10)
cb.ax.set_xticklabels([int(i) for i in lvls], fontsize=10, color = "#000000")
cb.set_label(bar_title,labelpad=-40, y=0.45, color = "#000000")
plt.savefig(fig_title+".png",bbox_inches='tight',dpi=300,transparent=True,pad_inches=.1)
plt.show()
In [29]:
max_lat = 10
min_lat = -40
max_lon = 170
min_lon = 80
In [33]:
q = '''
SELECT
integer(floor(lat)) lat_bin,
integer(floor(lon)) lon_bin,
count(distinct mmsi) distinct_mmsi,
count(*) pings
from (
select mmsi,lat, lon, seg_id from TABLE_DATE_RANGE([pipeline_classify_logistic_715.],
TIMESTAMP("2015-01-01"), TIMESTAMP("2015-01-01"))
where lat > '''+str(min_lat)+'''
AND lat <'''+str(max_lat)+''' and
lon > '''+str(min_lon)+'''
and lon < '''+str(max_lon)+'''
)
where
# mmsi not in (412437961,412437962,412420502,412420503,412420576,412420574,412420789,412420871,
# 900025357,900025393,413322650,414203586,412211196,412440255,412440256,412440257,412440258,412440259,
# 412440261,150200162,412440077,412440078,412420805,412420421,412440377,412425706,412447093,412211691,
# 412420276,412420917,411041815, 525018188, 412420276,412420561,533180156)
# AND
seg_id NOT IN (
SELECT seg_id
FROM
[scratch_david_seg_analysis.2015_segments_posonly] WHERE
(terrestrial_positions = point_count and point_count< 20)
OR ((min_lon >= 0 // these are almost definitely noise
AND max_lon <= 0.109225)
OR (min_lat >= 0
AND max_lat <= 0.109225))
)
group by lat_bin, lon_bin
//having pings > 20
'''
print q
pings_per_day = Query(q)
In [34]:
cellsize = 1
one_over_cellsize = 1
num_lats = (max_lat-min_lat)*one_over_cellsize
num_lons = (max_lon-min_lon)*one_over_cellsize
grid = np.zeros(shape=(num_lats,num_lons))
for row in pings_per_day:
lat = int(row[0])
lon = int(row[1])
lat_index = lat-min_lat #*one_over_cellsize
lon_index = lon-min_lon #*one_over_cellsize
grid[lat_index][lon_index] = 24*float(row[3])/float(row[2]) # divides by area in square km
makeMap(grid,"Average Positions per Day for All Vessels on Jan 1, 2015","Positions per Day",1,500.1, colormap=plt.get_cmap("viridis"))
In [57]:
def make_ping_count(thedate):
q = '''
SELECT
integer(floor(lat)) lat_bin,
integer(floor(lon)) lon_bin,
count(distinct mmsi) distinct_mmsi,
count(*) pings
from (
select mmsi,lat, lon, seg_id from [pipeline_classify_logistic_715.'''+str(thedate)+''']
where lat > '''+str(min_lat)+'''
AND lat <'''+str(max_lat)+''' and
lon > '''+str(min_lon)+'''
and lon < '''+str(max_lon)+'''
)
# where
# mmsi not in (412437961,412437962,412420502,412420503,412420576,412420574,412420789,412420871,
# 900025357,900025393,413322650,414203586,412211196,412440255,412440256,412440257,412440258,412440259,
# 412440261,150200162,412440077,412440078,412420805,412420421,412440377,412425706,412447093,412211691,
# 412420276,412420917,411041815, 525018188, 412420276,412420561,533180156)
# AND
# seg_id NOT IN (
# SELECT seg_id
# FROM
# [scratch_david_seg_analysis.2015_segments_posonly] WHERE
# (terrestrial_positions = point_count and point_count< 20)
# OR ((min_lon >= 0 // these are almost definitely noise
# AND max_lon <= 0.109225)
# OR (min_lat >= 0
# AND max_lat <= 0.109225))
# )
group by lat_bin, lon_bin
//having pings > 20
'''
# print q
pings_per_day = Query(q)
cellsize = 1
one_over_cellsize = 1
num_lats = (max_lat-min_lat)*one_over_cellsize
num_lons = (max_lon-min_lon)*one_over_cellsize
grid = np.zeros(shape=(num_lats,num_lons))
for row in pings_per_day:
lat = int(row[0])
lon = int(row[1])
lat_index = lat-min_lat*one_over_cellsize
lon_index = lon-min_lon*one_over_cellsize
grid[lat_index][lon_index] = 24*float(row[3])/float(row[2]) # divides by area in square km
cPickle.dump(pings_per_day, open('../../data/density/'+str(thedate)+'_csiro_region_pos.p', 'wb'))
cPickle.dump(grid, open('../../data/density/'+str(thedate)+'_csiro_region_pos_grid.p', 'wb'))
makeMap(grid,"Average Positions per Day for All Vessels on "+str(thedate),"Positions per Day",1,500.1, colormap=plt.get_cmap("viridis"))
In [58]:
make_ping_count(20120101)
make_ping_count(20120701)
make_ping_count(20130101)
make_ping_count(20130701)
make_ping_count(20140101)
make_ping_count(20140701)
make_ping_count(20150101)
make_ping_count(20150701)
make_ping_count(20160101)
In [61]:
thedates = [20120101, 20120701, 20130101, 20130701, 20140101, 20140701, 20150101, 20150701, 20160101]
for thedate in thedates:
# grid = cPickle.load(open('../../data/density/'+str(thedate)+'_csiro_region_pos_grid.p', 'rb'))
pos = cPickle.load(open('../../data/density/'+str(thedate)+'_csiro_region_pos.p', 'rb'))
grid = np.zeros(shape=(num_lats,num_lons))
for row in pos:
if int(row[2])> 5:
lat = int(row[0])
lon = int(row[1])
lat_index = lat-min_lat*one_over_cellsize
lon_index = lon-min_lon*one_over_cellsize
grid[lat_index][lon_index] = 24*float(row[3])/float(row[2]) # divides by area in square km
makeMap(grid,"Average Positions per Day for All Vessels on "+str(thedate),"Positions per Day",1,500.1, colormap=plt.get_cmap("viridis"))
In [70]:
thedates = [20120101, 20120701, 20130101, 20130701, 20140101, 20140701, 20150101, 20150701, 20160101]
# for thedate in thedates:
# grid = cPickle.load(open('../../data/density/'+str(thedate)+'_csiro_region_pos_grid.p', 'rb'))
thedate = 20150101
grid_csiro = np.zeros(shape=(num_lats,num_lons))
with open("ans_01Jan2015.csv","rb") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if 1:#int(row['MMSIcount'])>5:
lat = int(row['LATBIN'])
lon = int(row['LONBIN'])
lat_index = lat-min_lat*one_over_cellsize
lon_index = lon-min_lon*one_over_cellsize
numMMSI = int(row['MMSIcount'])
pings = int(row['COUNT'])
# print row
# break
grid_csiro[lat_index][lon_index] = 24*float(pings)/float(numMMSI) # divides by area in square km
pos = cPickle.load(open('../../data/density/'+str(thedate)+'_csiro_region_pos.p', 'rb'))
grid = np.zeros(shape=(num_lats,num_lons))
for row in pos:
if 1:#int(row[2])> 5:
lat = int(row[0])
lon = int(row[1])
lat_index = lat-min_lat*one_over_cellsize
lon_index = lon-min_lon*one_over_cellsize
grid[lat_index][lon_index] = 24*float(row[3])/float(row[2]) # divides by area in square km
makeMap(grid,"Average Positions per Day for ORBCOMM Vessels on "+str(thedate),"Positions per Day",1,500.1, colormap=plt.get_cmap("viridis"))
makeMap(grid_csiro,"Average Positions per Day for CSIRO Vessels on "+str(thedate),"Positions per Day",1,500.1, colormap=plt.get_cmap("viridis"))
In [78]:
newgrid = np.divide(grid,grid_csiro)
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j]== 0 or grid_csiro[i][j] == 0:
newgrid[i][j]= 0
else:
newgrid[i][j] = (grid[i][j] - grid_csiro[i][j]) / float(grid[i][j])
In [79]:
makeMap(np.divide(grid,grid_csiro),"Average Positions per Day for ORBCOMM Vessels on "+str(thedate),
"Positions per Day",.1,4, colormap=plt.get_cmap("viridis"))
In [85]:
def makeMapLinear(grid,fig_title,bar_title,fig_min_value,fig_max_value,colormap='vidris',
map_width=10, map_height=6):
fig = plt.figure(figsize=(map_width,map_height))
firstlat = max_lat
lastlat = min_lat
firstlon = min_lon
lastlon = max_lon
scale = cellsize
numlats = int((firstlat-lastlat)/scale+.5)
numlons = int((lastlon-firstlon)/scale+.5)
lat_boxes = np.linspace(lastlat,firstlat,num=numlats,endpoint=False)
lon_boxes = np.linspace(firstlon,lastlon,num=numlons,endpoint=False)
extra = 0
m = Basemap(llcrnrlat=lastlat-extra, urcrnrlat=firstlat+extra,
llcrnrlon=firstlon-extra, urcrnrlon=lastlon+extra, lat_ts=0, projection='mill',resolution="h")
m.drawmapboundary()#fill_color='#111111')
m.fillcontinents('#cccccc',lake_color='#cccccc')#, lake_color, ax, zorder, alpha)
x = np.linspace(firstlon, lastlon, -(firstlon-lastlon)*one_over_cellsize+1)
y = np.linspace(lastlat, firstlat, (firstlat-lastlat)*one_over_cellsize+1)
x, y = np.meshgrid(x, y)
converted_x, converted_y = m(x, y)
maximum = fig_max_value # grid.max()
minimum = fig_min_value #1
# norm = colors.LogNorm(vmin=minimum, vmax=maximum)
norm = colors.Normalize(vmin=minimum, vmax=maximum)
m.pcolormesh(converted_x, converted_y, grid, norm=norm, vmin=minimum, vmax=maximum, cmap = colormap)
t = fig_title
plt.title(t, color = "#000000", fontsize=18)
ax = fig.add_axes([0.2, 0.1, 0.65, 0.02]) #x coordinate ,
# norm = colors.LogNorm(vmin=minimum, vmax=maximum)
norm = colors.Normalize(vmin=minimum, vmax=maximum)
# lvls = np.logspace(np.log10(minimum),np.log10(maximum),num=6)
cb = colorbar.ColorbarBase(ax,norm = norm, orientation='horizontal', cmap = colormap) # ticks=lvls,
#cb.ax.set_xticklabels(["0" ,round(m3**.5,1), m3, round(m3**1.5,1), m3*m3,round(m3**2.5,1), str(round(m3**3,1))+"+"], fontsize=10)
# cb.ax.set_xticklabels([int(i) for i in lvls], fontsize=10, color = "#000000")
cb.set_label(bar_title,labelpad=-40, y=0.45, color = "#000000")
plt.savefig(fig_title+".png",bbox_inches='tight',dpi=300,transparent=True,pad_inches=.1)
plt.show()
In [87]:
newgrid = np.divide(grid,grid_csiro)
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j]== 0 or grid_csiro[i][j] == 0:
newgrid[i][j]= 1
# else:
# newgrid[i][j] = (grid[i][j] - grid_csiro[i][j]) / float(grid[i][j])
makeMapLinear(newgrid,"Ratio of Orbcomm to CSIRO Pings per MMSI on "+str(thedate),
"Obcomm Pings per mmsi ",0,2, colormap=plt.get_cmap("bwr"))
In [92]:
thedates = [20120101, 20120701, 20130101, 20130701, 20140101, 20140701, 20150101, 20150701, 20160101]
def makeComparison(thedate):
grid_csiro = np.zeros(shape=(num_lats,num_lons))
with open("ans"+str(thedate)+".csv","rb") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if 1:#int(row['MMSIcount'])>5:
lat = int(row['LATBIN'])
lon = int(row['LONBIN'])
lat_index = lat-min_lat*one_over_cellsize
lon_index = lon-min_lon*one_over_cellsize
numMMSI = int(row['MMSIcount'])
pings = int(row['COUNT'])
# print row
# break
grid_csiro[lat_index][lon_index] = float(pings)/float(numMMSI) # divides by area in square km
pos = cPickle.load(open('../../data/density/'+str(thedate)+'_csiro_region_pos.p', 'rb'))
grid = np.zeros(shape=(num_lats,num_lons))
for row in pos:
if 1:#int(row[2])> 5:
lat = int(row[0])
lon = int(row[1])
lat_index = lat-min_lat*one_over_cellsize
lon_index = lon-min_lon*one_over_cellsize
grid[lat_index][lon_index] = float(row[3])/float(row[2]) # divides by area in square km
newgrid = np.divide(grid,grid_csiro)
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j]== 0 or grid_csiro[i][j] == 0:
newgrid[i][j]= 1
# else:
# newgrid[i][j] = (grid[i][j] - grid_csiro[i][j]) / float(grid[i][j])
newgrid = np.divide(grid,grid_csiro)
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j]== 0 or grid_csiro[i][j] == 0:
newgrid[i][j]= 1
makeMapLinear(newgrid,"Ratio of Orbcomm to CSIRO Pings per MMSI on "+str(thedate),
"Obcomm Pings per mmsi ",0,2, colormap=plt.get_cmap("bwr"))
In [93]:
thedates = [20130101, 20130701, 20140101, 20140701, 20150101, 20150701, 20160101]
for d in thedates:
makeComparison(d)
In [94]:
thedates = [20120101, 20120701, 20130101, 20130701, 20140101, 20140701, 20150101, 20150701, 20160101]
def makeComparison2(thedate):
grid_csiro = np.zeros(shape=(num_lats,num_lons))
with open("ans"+str(thedate)+".csv","rb") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if 1:#int(row['MMSIcount'])>5:
lat = int(row['LATBIN'])
lon = int(row['LONBIN'])
lat_index = lat-min_lat*one_over_cellsize
lon_index = lon-min_lon*one_over_cellsize
numMMSI = int(row['MMSIcount'])
pings = int(row['COUNT'])
# print row
# break
grid_csiro[lat_index][lon_index] = float(numMMSI)
pos = cPickle.load(open('../../data/density/'+str(thedate)+'_csiro_region_pos.p', 'rb'))
grid = np.zeros(shape=(num_lats,num_lons))
for row in pos:
if 1:#int(row[2])> 5:
lat = int(row[0])
lon = int(row[1])
lat_index = lat-min_lat*one_over_cellsize
lon_index = lon-min_lon*one_over_cellsize
grid[lat_index][lon_index] = float(row[2])
newgrid = np.divide(grid,grid_csiro)
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j]== 0 or grid_csiro[i][j] == 0:
newgrid[i][j]= 1
# else:
# newgrid[i][j] = (grid[i][j] - grid_csiro[i][j]) / float(grid[i][j])
newgrid = np.divide(grid,grid_csiro)
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j]== 0 or grid_csiro[i][j] == 0:
newgrid[i][j]= 1
makeMapLinear(newgrid,"Ratio of Orbcomm to CSIRO Pings per MMSI on "+str(thedate),
"Obcomm Pings per mmsi ",0,2, colormap=plt.get_cmap("bwr"))
In [95]:
thedates = [20130101, 20130701, 20140101, 20140701, 20150101, 20150701, 20160101]
for d in thedates:
makeComparison2(d)
In [ ]: