In [1]:
import pandas as pd
import geopandas as gpd
import datetime
import numpy as np
from shapely.geometry import Point, LineString
import shapely.wkt
import matplotlib.pyplot as plt
%matplotlib inline

#doc on accelerometer
# http://www.starlino.com/imu_guide.html

Load original data


In [2]:
#phone info
f = open('../data/SEQ_0/track.txt','r')
phone = f.readline()
print 'phone info:',phone
f.close()


phone info: motorola Moto G (4);7.0;1.1.6;2.0.10;photo


In [3]:
photos = pd.read_csv('../data/SEQ_1/track.txt',sep=';',
                   skiprows=[0],
                   skipfooter=1,
                     usecols=[0,15],
                   header=None,
                   engine = 'python')
photos.columns = ['timestamp','photo']
photos = photos.loc[~(photos['photo'].isnull()),:]
photos.head()


Out[3]:
timestamp photo
4491 1.494902e+09 0.0
5630 1.494902e+09 1.0
6245 1.494902e+09 2.0
7128 1.494902e+09 3.0
7737 1.494902e+09 4.0

In [ ]:
photos['timestamp'][4491]

In [4]:
#read original data from file within track.txt.gz used by OSC to store sensor data
data = pd.read_csv('../data/SEQ_1/track.txt',sep=';',
                   skiprows=[0],
                   skipfooter=1,
                   usecols=[0,1,2,3,4,5,9,10,11,16,17,18],
                   header=None,
                   engine = 'python')

#naming of columns 
names = ['timestamp','long','lat','elevation','horizontal_accu',
         'GPSspeed','accelerationX','accelerationY','accelerationZ',
         'gravityX','gravityY','gravityZ'
        ]

data.columns=names

data.head()


Out[4]:
timestamp long lat elevation horizontal_accu GPSspeed accelerationX accelerationY accelerationZ gravityX gravityY gravityZ
0 1.494902e+09 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 1.494902e+09 NaN NaN NaN NaN NaN -0.012438 0.003953 -0.006077 NaN NaN NaN
2 1.494902e+09 NaN NaN NaN NaN NaN NaN NaN NaN -0.148694 0.845168 0.513401
3 1.494902e+09 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 1.494902e+09 NaN NaN NaN NaN NaN -0.012438 0.003953 -0.006077 NaN NaN NaN

In [5]:
#conversion into timestamp
dates = []
for i in range(data.shape[0]):
    try:
        dates.append(datetime.datetime.fromtimestamp(data['timestamp'].iloc[i]))
    except :
        print 'Error with row:', i
data['timestamp'] = dates        
data.head()


Out[5]:
timestamp long lat elevation horizontal_accu GPSspeed accelerationX accelerationY accelerationZ gravityX gravityY gravityZ
0 2017-05-15 22:28:20.497406 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 2017-05-15 22:28:20.497364 NaN NaN NaN NaN NaN -0.012438 0.003953 -0.006077 NaN NaN NaN
2 2017-05-15 22:28:20.497339 NaN NaN NaN NaN NaN NaN NaN NaN -0.148694 0.845168 0.513401
3 2017-05-15 22:28:20.507680 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 2017-05-15 22:28:20.507618 NaN NaN NaN NaN NaN -0.012438 0.003953 -0.006077 NaN NaN NaN

In [6]:
#remove all empty rows except timestamp
emtpy = data.iloc[:,1:].isnull().sum(axis=1) == data.shape[1]-1
data = data.loc[~emtpy,:]
data.index=range(data.shape[0])

data.head()


Out[6]:
timestamp long lat elevation horizontal_accu GPSspeed accelerationX accelerationY accelerationZ gravityX gravityY gravityZ
0 2017-05-15 22:28:20.497364 NaN NaN NaN NaN NaN -0.012438 0.003953 -0.006077 NaN NaN NaN
1 2017-05-15 22:28:20.497339 NaN NaN NaN NaN NaN NaN NaN NaN -0.148694 0.845168 0.513401
2 2017-05-15 22:28:20.507618 NaN NaN NaN NaN NaN -0.012438 0.003953 -0.006077 NaN NaN NaN
3 2017-05-15 22:28:20.507593 NaN NaN NaN NaN NaN NaN NaN NaN -0.148694 0.845168 0.513401
4 2017-05-15 22:28:20.517672 NaN NaN NaN NaN NaN -0.012438 0.003953 -0.006077 NaN NaN NaN

In [7]:
#short sample of data with gps data in teh extremes
#data from 49 to 451 with 250 in the middle
#choose long lat and acc data
#calcular la distancia euclidiana


#dataClip = data.loc[49:451,['long','lat','accelerationX','accelerationY','accelerationZ']]
#dataClip.dropna(axis=0,how='all',inplace=True)
#dataClip.to_csv('testcase.csv',index_label=False)
#dataClip.head()

In [8]:
dataClip = data.loc[:,['long','lat','accelerationX','accelerationY','accelerationZ']]
dataClip.dropna(axis=0,how='all',inplace=True)
dataClip.head()


Out[8]:
long lat accelerationX accelerationY accelerationZ
0 NaN NaN -0.012438 0.003953 -0.006077
2 NaN NaN -0.012438 0.003953 -0.006077
4 NaN NaN -0.012438 0.003953 -0.006077
6 NaN NaN -0.012438 0.003953 -0.006077
8 NaN NaN -0.012438 0.003953 -0.006077

Geography


In [9]:
print 'GPS data points for clip data:'
gpsDataPoints =  dataClip.loc[~ (dataClip['long'].isnull()),['long','lat']]
gpsDataPoints['pointIndex'] = gpsDataPoints.index
gpsDataPoints.head()


GPS data points for clip data:
Out[9]:
long lat pointIndex
136 -74.000973 40.695163 136
341 -74.000974 40.695165 341
546 -74.000975 40.695166 546
825 -74.000982 40.695163 825
1216 -74.000986 40.695160 1216

In [10]:
geometry = []
for i in range(len(gpsDataPoints.index)):
    if i == (len(gpsDataPoints.index)-1):
        line = np.nan
    else:
        #get start and end points for each line
        startPoint = Point(gpsDataPoints['long'].loc[gpsDataPoints.index[i]], gpsDataPoints['lat'].loc[gpsDataPoints.index[i]])
        endPoint = Point(gpsDataPoints['long'].loc[gpsDataPoints.index[i+1]], gpsDataPoints['lat'].loc[gpsDataPoints.index[i+1]])
        #convert to shapely wkt
        line = LineString([startPoint,endPoint]).wkt
        geometry.append(shapely.wkt.loads(line).centroid)

In [11]:
gpsDataPoints = gpsDataPoints.iloc[:-1]
crs = {'init': 'epsg:4326'}
gpsDataPoints = gpd.GeoDataFrame(gpsDataPoints, crs=crs, geometry=geometry)
gpsDataPoints.head()


Out[11]:
long lat pointIndex geometry
136 -74.000973 40.695163 136 POINT (-74.00097359999999 40.69516439)
341 -74.000974 40.695165 341 POINT (-74.000974575 40.69516552)
546 -74.000975 40.695166 546 POINT (-74.00097883000001 40.695164275)
825 -74.000982 40.695163 825 POINT (-74.00098446999999 40.69516133)
1216 -74.000986 40.695160 1216 POINT (-74.000988475 40.695159305)

In [12]:
gpsDataPoints.plot()


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5bda93c510>

Total vector from XYZ


In [ ]:
dataClip = dataClip.merge(gpsDataPoints.drop(['geometry'],axis=1),how='left')
dataClip.head()

In [ ]:
dataClip['pointIndex'] = dataClip['pointIndex'].fillna(method='ffill')
dataClip.head()

In [ ]:
dataClipShifted = dataClip.shift(1)
dataClipShifted.drop(['long','lat','pointIndex'],axis=1,inplace=True)
dataClipShifted.columns = ['accelerationXShift','accelerationYShift','accelerationZShift']
dataClip = pd.concat([dataClip,dataClipShifted],axis=1)
dataClip.drop(['long','lat'],axis=1,inplace=True)
dataClip.dropna(axis=0,how='any',inplace=True)

dataClipShifted.head()

Calculating vector

$ (x_1 -x_1lag)^2 + (y_1 - y_1lag)^2 + (z_1 - z_1lag)^2 $


In [ ]:
dataClip['V'] = np.sqrt((dataClip.accelerationX-dataClip.accelerationXShift) ** 2 + \
    #(dataClip.accelerationY-dataClip.accelerationYShift) ** 2 + \
    (dataClip.accelerationZ-dataClip.accelerationZShift) ** 2)

In [ ]:
vectorInformation = dataClip.loc[:,['pointIndex','V']].groupby(by=['pointIndex']).sum()
vectorInformation.head()

In [ ]:
vectorInformation.reset_index(inplace=True)
#apply the square root to the sum
vectorInformation.head()

In [ ]:
gpsDataPoints = gpsDataPoints.merge(vectorInformation)

In [ ]:
gpsDataPoints.plot(column='V')

In [ ]:
#centroids

In [ ]:
gpsDataPoints.to_file('../shapes/SEQ_1')

In [ ]:


In [ ]: