In [2]:
import time
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib import colors,colorbar
%matplotlib inline
import csv
import math
from math import radians, cos, sin, asin, sqrt
from scipy import stats
import math
import cPickle
In [5]:
import bq
client = bq.Client.Get()
In [6]:
def Query(q):
t0 = time.time()
answer = client.ReadTableRows(client.Query(q)['configuration']['query']['destinationTable'])
print 'Query time: ' + str(time.time() - t0) + ' seconds.'
return answer
In [12]:
def query_date(thedate):
q = '''
SELECT
integer(FLOOR(first_lat*10)) lat_bin,
integer(FLOOR(first_lon*10)) lon_bin,
integer(FLOOR(avg_lat*10)) lat_bin_avg,
integer(FLOOR(avg_lon*10)) lon_bin_avg,
satellite_positions sat_positions,
terrestrial_positions terrestrial_positions,
positions_weighted,
avg_speed,
slow_pings,
mmsi
FROM
(SELECT
mmsi,
SUM( CASE WHEN speed = 0 OR (speed<=2 AND type IN (18, 19)) THEN 180
WHEN (speed > 0 AND speed <14 AND type IN (1,2,3)AND turn = 0 )
OR (speed>2 AND type IN (18,19)) THEN 10
when speed>0 and speed<14 and type in (1,2,3) and turn !=0 then 3.3
when speed>=14 and speed<23 and type in (1,2,3) and turn = 0 then 6
when type in (1,2,3) and (speed>=23 or (speed>=14 and turn !=0)) then 2
END) positions_weighted,
first(lat) first_lat,
first(lon) first_lon,
avg(lat) avg_lat,
avg(lon) avg_lon,
max(lat) max_lat,
min(lat) min_lat,
max(lon) max_lon,
min(lon) min_lon,
avg(speed) avg_speed,
sum(if( (speed=0 and type in (1,2,3)) or (speed<2 and type in (18,19)),1,0 )) slow_pings,
sum( if(REGEXP_REPLACE(tagblock_station, 'u', '') IN ('rORBCOMM000',
'rORBCOMM01',
'rORBCOMM008',
'rORBCOMM009',
'rORBCOMM010'),1,0)) terrestrial_positions,
sum( if(REGEXP_REPLACE(tagblock_station, 'u', '') not IN ('rORBCOMM000',
'rORBCOMM01',
'rORBCOMM008',
'rORBCOMM009',
'rORBCOMM010'),1,0)) satellite_positions,
FROM
[pipeline_normalize.'''+thedate+''']
WHERE
type IN (1,2,3,18,19) and lat is not null and lon is not null and speed is not null and turn is not null
group by mmsi
)
where
max_lat - min_lat <5
AND (max_lon - min_lon < 10
OR first_lon > 170
OR first_lon < -170)
AND mmsi IN (select mmsi from
[scratch_david_gapanalysis.good_mmsi_2015_1000pings])
'''
positions = Query(q)
cPickle.dump(positions, open('../../data/density/'+thedate+'_v2_vessels.p', 'wb'))
In [15]:
for i in range(1,32):
thedate = "201503"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
query_date(thedate)
In [3]:
import csv
for i in range(1,32):
thedate = "201501"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
positions = cPickle.load(open('../../data/density/'+thedate+'_v2_vessels.p', 'rb'))
with open('../../data/density/'+thedate+'_v2_vessels.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerow(["lat_bin","lon_bin","lat_bin_avg","lon_bin_avg","sat_positions",
"terrestrial_positions","positions_weighted",
"avg_speed","slow_pings","mmsi"])
writer.writerows(positions)
In [5]:
for i in range(1,29):
thedate = "201502"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
positions = cPickle.load(open('../../data/density/'+thedate+'_v2_vessels.p', 'rb'))
with open('../../data/density/'+thedate+'_v2_vessels.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerow(["lat_bin","lon_bin","lat_bin_avg","lon_bin_avg","sat_positions",
"terrestrial_positions","positions_weighted",
"avg_speed","slow_pings","mmsi"])
writer.writerows(positions)
In [4]:
for i in range(1,31):
thedate = "201503"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
positions = cPickle.load(open('../../data/density/'+thedate+'_v2_vessels.p', 'rb'))
with open('../../data/density/'+thedate+'_v2_vessels.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerow(["lat_bin","lon_bin","lat_bin_avg","lon_bin_avg","sat_positions",
"terrestrial_positions","positions_weighted",
"avg_speed","slow_pings","mmsi"])
writer.writerows(positions)
In [6]:
for i in range(31,32):
thedate = "201503"
if i<10:
thedate += "0"+str(i)
else:
thedate += str(i)
print thedate
positions = cPickle.load(open('../../data/density/'+thedate+'_v2_vessels.p', 'rb'))
with open('../../data/density/'+thedate+'_v2_vessels.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerow(["lat_bin","lon_bin","lat_bin_avg","lon_bin_avg","sat_positions",
"terrestrial_positions","positions_weighted",
"avg_speed","slow_pings","mmsi"])
writer.writerows(positions)
In [ ]: