In [4]:
from sklearn import svm, model_selection
import src.vector_gen.generateCurrentSituationVector as vecX
import src.vector_gen.generate_VectorY as vecY
import src.misc.split_train_valid as split
import src.misc.paths as path
import pandas as pd
import numpy as np
from sklearn.multioutput import MultiOutputRegressor


np.set_printoptions(threshold=np.nan)

df = pd.read_csv('../../dataset/training2/trajectories(table_5)_training2.csv')

training, validation, testing = split.split_dataset(df)

X_train = vecX.generate_x_df(training)
Y_train = vecY.generate_VectorY_df(training)

X_test = vecX.generate_x_df(testing)
Y_test = vecY.generate_VectorY_df(testing)


#model

from sklearn.multioutput import MultiOutputRegressor

clf = svm.SVR(C=30, epsilon=0.005)

regr_multi_svr = MultiOutputRegressor(clf)

regr_multi_svr.fit(X_train, Y_train)
Y_pred= regr_multi_svr.predict(X_test)

#print(Y_pred)
#print(len(Y_pred))


#MAPE

from src.misc import evaluation as eval
error = eval.mape(Y_pred, Y_test)

print(error)
print(np.mean(np.array(error)))


The length of the original data frame is :  10136
The data set is being split into  10 buckets
The number n of test buckets is 3
Bucket  6 will be used as validation set
The test_idx  0
The test_end  2
The dataset_splitted variable has the type  <class 'list'>
Slicing data set from  0  to  2
The type of validation set is  <class 'pandas.core.frame.DataFrame'>
The length of the validation set is  1013
The type of test set is  <class 'pandas.core.frame.DataFrame'>
The length of the test set is  3042
k is  10
range(0, 10)
0 <class 'int'>
1 <class 'int'>
2 <class 'int'>
3 <class 'int'>
4 <class 'int'>
5 <class 'int'>
6 <class 'int'>
7 <class 'int'>
8 <class 'int'>
9 <class 'int'>
[3, 4, 5, 7, 8, 9]
The length of the training set is  6081