In [3]:
import glob
import os
import numpy as np
import pandas as pd
import sklearn.linear_model as lm
import matplotlib.pyplot as plt
import dateutil.parser
from datetime import datetime
path = "./APE_running_data"
def convert(date):
dt = dateutil.parser.parse(date).replace(tzinfo=None)
epoch = datetime.utcfromtimestamp(0)
delta = dt - epoch
return delta.total_seconds()
colnames = np.array(['time', 'elevation', 'distance', 'speed'])
Datasets_all = []
os.chdir(path)
for file in glob.glob("*.tab"):
print("Processing {}".format(file))
dataset = np.genfromtxt(file, skip_header=1,delimiter='\t', converters={0: convert})
dataset[:,0] -= dataset[0,0]
dataset = pd.DataFrame(dataset,columns=colnames)
slope = np.array([])
window_size_half = 8
for j in dataset.index:
index = np.arange(j-window_size_half+1, j+window_size_half+1)
index = index[(index >= 0) & (index < len(dataset))]
dataset_part = dataset.iloc[index].dropna()
regr = lm.LinearRegression()
regr.fit(dataset_part.distance[:,np.newaxis], np.array(dataset_part.elevation))
slope = np.append(slope,regr.coef_)
dataset['slope'] = slope
if (len(dataset) > 300) == (len(dataset) < 900):
Datasets_all.append(dataset)
Number of training sessions:
In [4]:
len(Datasets_all)
Out[4]:
In [9]:
print(type(Datasets_all))
print(type(Datasets_all[0]))
print(len(Datasets_all[0]))
LENGTH = 10
print(Datasets_all[0][:LENGTH])
#print(Datasets_all[Datasets_all.columns.difference(['speed'])])
print(Datasets_all[0][LENGTH:LENGTH+1]['speed'])
print(Datasets_all[0][:1])
print(Datasets_all[0][1:2]['speed'])
In [5]:
%matplotlib inline
import pylab as pl
In [6]:
race = 6
ds = Datasets_all[race]
pl.plot(ds['time'],ds['elevation'] )
Out[6]:
In [ ]: