The goal of this post is to visualize flights taken from Google location data using Python
In [1]:
import os
import glob
import json
import time
import fiona
import datetime
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
from shapely.prepared import prep
from descartes import PolygonPatch
from mpl_toolkits.basemap import Basemap
from matplotlib.collections import PatchCollection
from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon
warnings.filterwarnings('ignore')
In [2]:
with open('LocationHistory2018.json', 'r') as fh:
raw = json.loads(fh.read())
# use location_data as an abbreviation for location data
location_data = pd.DataFrame(raw['locations'])
del raw #free up some memory
# convert to typical units
location_data['latitudeE7'] = location_data['latitudeE7']/float(1e7)
location_data['longitudeE7'] = location_data['longitudeE7']/float(1e7)
location_data['timestampMs'] = location_data['timestampMs'].map(lambda x: float(x)/1000) #to seconds
location_data['datetime'] = location_data.timestampMs.map(datetime.datetime.fromtimestamp)
# Rename fields based on the conversions we just did
location_data.rename(columns={'latitudeE7':'latitude',
'longitudeE7':'longitude',
'timestampMs':'timestamp'}, inplace=True)
# Ignore locations with accuracy estimates over 1000m
location_data = location_data[location_data.accuracy < 1000]
location_data.reset_index(drop=True, inplace=True)
In [3]:
location_data.head()
Out[3]:
In [4]:
location_data.dtypes
Out[4]:
In [5]:
location_data.describe()
Out[5]:
In [6]:
print("earliest observed date: {}".format(min(location_data["datetime"]).strftime('%m-%d-%Y')))
print("latest observed date: {}".format(max(location_data["datetime"]).strftime('%m-%d-%Y')))
earliest_obs = min(location_data["datetime"]).strftime('%m-%d-%Y')
latest_obs = max(location_data["datetime"]).strftime('%m-%d-%Y')
Consult this post for more info about degrees and radians in distance calculation.
In [7]:
degrees_to_radians = np.pi/180.0
location_data['phi'] = (90.0 - location_data.latitude) * degrees_to_radians
location_data['theta'] = location_data.longitude * degrees_to_radians
# Compute distance between two GPS points on a unit sphere
location_data['distance'] = np.arccos(
np.sin(location_data.phi)*np.sin(location_data.phi.shift(-1)) * np.cos(location_data.theta - location_data.theta.shift(-1)) +
np.cos(location_data.phi)*np.cos(location_data.phi.shift(-1))) * 6378.100 # radius of earth in km
In [8]:
location_data['speed'] = location_data.distance/(location_data.timestamp - location_data.timestamp.shift(-1))*3600 #km/hr
In [9]:
flight_data = pd.DataFrame(data={'end_lat':location_data.latitude,
'end_lon':location_data.longitude,
'end_datetime':location_data.datetime,
'distance':location_data.distance,
'speed':location_data.speed,
'start_lat':location_data.shift(-1).latitude,
'start_lon':location_data.shift(-1).longitude,
'start_datetime':location_data.shift(-1).datetime,
}).reset_index(drop=True)
In [10]:
def distance_on_unit_sphere(lat1, long1, lat2, long2):
# http://www.johndcook.com/python_longitude_latitude.html
# Convert latitude and longitude to spherical coordinates in radians.
degrees_to_radians = np.pi/180.0
# phi = 90 - latitude
phi1 = (90.0 - lat1)*degrees_to_radians
phi2 = (90.0 - lat2)*degrees_to_radians
# theta = longitude
theta1 = long1*degrees_to_radians
theta2 = long2*degrees_to_radians
cos = (np.sin(phi1)*np.sin(phi2)*np.cos(theta1 - theta2) +
np.cos(phi1)*np.cos(phi2))
arc = np.arccos( cos )
# Remember to multiply arc by the radius of the earth
# in your favorite set of units to get length.
return arc
In [11]:
flights = flight_data[(flight_data.speed > 40) & (flight_data.distance > 80)].reset_index()
# Combine instances of flight that are directly adjacent
# Find the indices of flights that are directly adjacent
_f = flights[flights['index'].diff() == 1]
adjacent_flight_groups = np.split(_f, (_f['index'].diff() > 1).nonzero()[0])
# Now iterate through the groups of adjacent flights and merge their data into
# one flight entry
for flight_group in adjacent_flight_groups:
idx = flight_group.index[0] - 1 #the index of flight termination
flights.loc[idx, ['start_lat', 'start_lon', 'start_datetime']] = [flight_group.iloc[-1].start_lat,
flight_group.iloc[-1].start_lon,
flight_group.iloc[-1].start_datetime]
# Recompute total distance of flight
flights.loc[idx, 'distance'] = distance_on_unit_sphere(flights.loc[idx].start_lat,
flights.loc[idx].start_lon,
flights.loc[idx].end_lat,
flights.loc[idx].end_lon)*6378.1
# Now remove the "flight" entries we don't need anymore.
flights = flights.drop(_f.index).reset_index(drop=True)
# Finally, we can be confident that we've removed instances of flights broken up by
# GPS data points during flight. We can now be more liberal in our constraints for what
# constitutes flight. Let's remove any instances below 200km as a final measure.
flights = flights[flights.distance > 200].reset_index(drop=True)
This algorithm worked 100% of the time for me - no false positives or negatives. But the adjacency-criteria of the algorithm is fairly brittle. The core of it centers around the assumption that inter-flight GPS data will be directly adjacent to one another. That's why the initial screening on line 1 of the previous cell had to be so liberal.
Now, the flights DataFrame contains only instances of true flights which facilitates plotting with Matplotlib's Basemap. If we plot on a flat projection like tmerc, the drawgreatcircle function will produce a true path arc just like we see in the in-flight magazines.
In [12]:
flights = flights.sort_values(by="start_datetime").reset_index()
flights["index"] = flights.index
flights["index"] = flights["index"].apply(lambda x: '{0:0>2}'.format(x))
flights.index = flights["index"]
In [13]:
flights.iloc[0]
Out[13]:
flights2018
within the output
directory to save all .pngs
In [14]:
if not os.path.exists('output/flights2018'):
os.makedirs('output/flights2018')
In [15]:
fig = plt.figure(figsize=(18,12))
# Plotting across the international dateline is tough. One option is to break up flights
# by hemisphere. Otherwise, you'd need to plot using a different projection like 'robin'
# and potentially center on the Int'l Dateline (lon_0=-180)
# Western Hemisphere Flights
# flights = flights[(flights.start_lon < 0) & (flights.end_lon < 0)]
# Eastern Hemisphere Flights
# flights = flights[(flights.start_lon > 0) & (flights.end_lon > 0)]
xbuf = 0.2
ybuf = 0.35
min_lat = np.min([flights.end_lat.min(), flights.start_lat.min()])
min_lon = np.min([flights.end_lon.min(), flights.start_lon.min()])
max_lat = np.max([flights.end_lat.max(), flights.start_lat.max()])
max_lon = np.max([flights.end_lon.max(), flights.start_lon.max()])
width = max_lon - min_lon
height = max_lat - min_lat
m = Basemap(llcrnrlon=min_lon - width* xbuf,
llcrnrlat=min_lat - height*ybuf,
urcrnrlon=max_lon + width* xbuf,
urcrnrlat=max_lat + height*ybuf,
projection='merc',
resolution='l',
lat_0=min_lat + height/2,
lon_0=min_lon + width/2,)
m.drawmapboundary(fill_color='#EBF4FA')
m.drawcoastlines()
m.drawstates()
m.drawcountries()
m.fillcontinents()
for idx, f in flights.iterrows():
m.drawgreatcircle(f.start_lon, f.start_lat, f.end_lon,
f.end_lat, linewidth=3, alpha=1, color='#ffd700' )
m.plot(*m(f.start_lon, f.start_lat), color='g', alpha=0.8, marker='o')
m.plot(*m(f.end_lon, f.end_lat), color='r', alpha=0.5, marker='o' )
plt.savefig('output/flights2018/flights_{}.png'.format(idx),
dpi=150, frameon=False, transparent=False,
bbox_inches='tight', pad_inches=0.2)
m.drawgreatcircle(f.start_lon, f.start_lat, f.end_lon, f.end_lat,
linewidth=3, alpha=0.5, color='#800080' )
m.drawgreatcircle(f.start_lon, f.start_lat, f.end_lon, f.end_lat,
linewidth=3, alpha=0.5, color='b' )
m.plot(*m(f.start_lon, f.start_lat), color='k', alpha=0.8, marker='o')
m.plot(*m(f.end_lon, f.end_lat), color='k', alpha=0.5, marker='o' )
flights2018
directory with the glob
In [16]:
# code to create .gif from:
# http://superfluoussextant.com/making-gifs-with-python.html
gif_name = 'flights2018'
# Get all the pngs in the current directory
file_list = glob.glob('output/flights2018/*.png')
# Sort the images by number
list.sort(file_list, key=lambda x: int(x.split('_')[1].split('.png')[0]))
with open('image_list.txt', 'w') as file:
for item in file_list:
file.write("%s\n" % item)
# On Windows convert is 'magick'
os.system('magick -loop 0 -delay "10" @image_list.txt {}.gif'.format(gif_name))
# On Unix/Mac use convert
#os.system('convert -loop 0 -delay 15 @image_list.txt {}.gif'.format(gif_name))
Out[16]:
In [17]:
Image(url='flights2018.gif')
Out[17]:
In [18]:
fig = plt.figure(figsize=(18,12))
current_date = time.strftime("printed: %a, %d %b %Y", time.localtime())
png_name = 'flights2018'
xbuf = 0.2
ybuf = 0.35
min_lat = np.min([flights.end_lat.min(), flights.start_lat.min()])
min_lon = np.min([flights.end_lon.min(), flights.start_lon.min()])
max_lat = np.max([flights.end_lat.max(), flights.start_lat.max()])
max_lon = np.max([flights.end_lon.max(), flights.start_lon.max()])
width = max_lon - min_lon
height = max_lat - min_lat
m = Basemap(llcrnrlon=min_lon - width* xbuf,
llcrnrlat=min_lat - height*ybuf,
urcrnrlon=max_lon + width* xbuf,
urcrnrlat=max_lat + height*ybuf,
projection='merc',
resolution='l',
lat_0=min_lat + height/2,
lon_0=min_lon + width/2,)
m.drawmapboundary(fill_color='#EBF4FA')
m.drawcoastlines()
m.drawstates()
m.drawcountries()
m.fillcontinents()
for idx, f in flights.iterrows():
m.drawgreatcircle(f.start_lon, f.start_lat, f.end_lon, f.end_lat,
linewidth=3, alpha=0.4, color='#800080')
m.plot(*m(f.start_lon, f.start_lat), color='g', alpha=0.8, marker='o')
m.plot(*m(f.end_lon, f.end_lat), color='r', alpha=0.5, marker='o' )
fig.text(0.125, 0.18,
"Data collected from 2013-2018 on Android \nPlotted using Python, Basemap \n%s" % (current_date),
ha='left', color='#555555', style='italic')
fig.text(0.125, 0.15, "kivanpolimis.com", color='#555555', fontsize=16, ha='left')
plt.savefig('{}.png'.format(png_name),
dpi=150, frameon=False, transparent=False, bbox_inches='tight', pad_inches=0.2)
In [19]:
Image(filename='flights2018.png')
Out[19]:
In [20]:
import sys
import time
print("System and module version information: \n")
print('Python version: \n{} \n'.format(sys.version_info))
print("last updated: {}".format(time.strftime("%a, %d %b %Y %H:%M", time.localtime())))