In [1]:
import sys
import pandas as pd
import xml.etree.ElementTree as ET
from math import radians, cos, sin, asin, sqrt
from pykalman import KalmanFilter

In [2]:
def output_gpx(points, output_filename):
    """
    Output a GPX file with latitude and longitude from the points DataFrame.
    """
    from xml.dom.minidom import getDOMImplementation
    def append_trkpt(pt, trkseg, doc):
        trkpt = doc.createElement('trkpt')
        trkpt.setAttribute('lat', '%.8f' % (pt['lat']))
        trkpt.setAttribute('lon', '%.8f' % (pt['lon']))
        trkseg.appendChild(trkpt)
    
    doc = getDOMImplementation().createDocument(None, 'gpx', None)
    trk = doc.createElement('trk')
    doc.documentElement.appendChild(trk)
    trkseg = doc.createElement('trkseg')
    trk.appendChild(trkseg)
    
    points.apply(append_trkpt, axis=1, trkseg=trkseg, doc=doc)
    
    with open(output_filename, 'w') as fh:
        doc.writexml(fh, indent=' ')
        
def get_data(filename):
    tree = ET.parse(filename)
    ns = tree.getroot().tag.split('}')[0]+'}'
    pt_nodes = tree.findall("./{0}trk/{0}trkseg/{0}trkpt".format(ns))
    pts = [l.attrib for l in pt_nodes]
    df = pd.DataFrame(pts)
    df['lat'] =df['lat'].apply(float)
    df['lon'] =df['lon'].apply(float)
    return df

# Re-Submitting with source url
# https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points
def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    r = r * 1000 # convert to meters
    return c * r

In [3]:
def distance(points_df):
    df2 = points_df.copy()
    df2.columns = ['lat1','lon1']
    points_shift = df2.shift(1)
    points_shift.columns = ['lat2','lon2']
    points_comb = df2.join(points_shift)[1:]
    points_comb['dist'] = points_comb.apply(lambda x: haversine(x.lat1,x.lon1,x.lat2,x.lon2),axis=1)
    return points_comb['dist'].sum()

In [4]:
def smooth(points_df):
    initial_state = points_df.iloc[0]
    observation_stddev = 15 / (10 ** 5)
    transition_stddev = 7 / (10 ** 5)
    observation_covariance = [[observation_stddev ** 2, 0], [0, observation_stddev ** 2]]
    transition_covariance = [[transition_stddev ** 2, 0], [0, transition_stddev ** 2]]


    kf = KalmanFilter(initial_state_mean=initial_state,
                    transition_matrices = [[1, 0], [0, 1]], 
                    observation_matrices = [[1, 0], [0, 1]],
                    observation_covariance = observation_covariance,
                    transition_covariance = transition_covariance)
    kalman_smoothed, _ = kf.smooth(points_df)
    df = pd.DataFrame(columns= ['lat','lon'])
    df['lat'] = pd.to_numeric(kalman_smoothed[:,0])
    df['lon'] = pd.to_numeric(kalman_smoothed[:,1])
    return df

In [6]:
def main():
    points = get_data(sys.argv[1])

    print('Unfiltered distance: %0.2f' % (distance(points),))
    
    smoothed_points = smooth(points)
    print('Filtered distance: %0.2f' % (distance(smoothed_points),))
    output_gpx(smoothed_points, 'out.gpx')

    smoothed_points
if __name__ == '__main__':
    main()


---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-6-6fd33fe8d9a1> in <module>()
     10     smoothed_points
     11 if __name__ == '__main__':
---> 12     main()

<ipython-input-6-6fd33fe8d9a1> in main()
      1 def main():
----> 2     points = get_data(sys.argv[1])
      3 
      4     print('Unfiltered distance: %0.2f' % (distance(points),))
      5 

<ipython-input-2-3e651bbb7eb5> in get_data(filename)
     22 
     23 def get_data(filename):
---> 24     tree = ET.parse(filename)
     25     ns = tree.getroot().tag.split('}')[0]+'}'
     26     pt_nodes = tree.findall("./{0}trk/{0}trkseg/{0}trkpt".format(ns))

/home/john/anaconda3/lib/python3.6/xml/etree/ElementTree.py in parse(source, parser)
   1194     """
   1195     tree = ElementTree()
-> 1196     tree.parse(source, parser)
   1197     return tree
   1198 

/home/john/anaconda3/lib/python3.6/xml/etree/ElementTree.py in parse(self, source, parser)
    584         close_source = False
    585         if not hasattr(source, "read"):
--> 586             source = open(source, "rb")
    587             close_source = True
    588         try:

FileNotFoundError: [Errno 2] No such file or directory: '-f'