In [1]:
import pandas as pd
import numpy as np

from sklearn import svm

import matplotlib.pyplot as plt
import seaborn

In [2]:
df = pd.read_csv("analysis/13141516.csv")

In [11]:
ga_params = df[['P12','P13','P14','P15']].as_matrix()
wait_avg = df['kpi7_avg'].as_matrix()

In [16]:
print("input shape: ", ga_params.shape)
print("target shape: ", wait_avg.shape)


input shape:  (14187, 4)
target shape:  (14187,)

In [10]:
df


Out[10]:
#name id P12 P13 P14 P15 kpi7_avg kpi7_max Unnamed: 8
0 NaN 1 95 60 30 0 26 31 True
1 NaN 2 95 60 30 1 26 29 True
2 NaN 3 95 60 30 2 26 29 True
3 NaN 4 95 60 30 4 26 31 True
4 NaN 5 95 60 30 5 26 31 True
5 NaN 6 95 60 30 7 26 28 True
6 NaN 7 95 60 30 8 26 30 True
7 NaN 8 95 60 30 10 26 33 True
8 NaN 9 95 60 37 0 25 30 True
9 NaN 10 95 60 37 1 26 33 True
10 NaN 11 95 60 37 2 26 32 True
11 NaN 12 95 60 37 4 25 32 True
12 NaN 13 95 60 37 5 26 32 True
13 NaN 14 95 60 37 7 26 32 True
14 NaN 15 95 60 37 8 26 30 True
15 NaN 16 95 60 37 10 26 29 True
16 NaN 17 95 60 44 0 25 30 True
17 NaN 18 95 60 44 1 25 29 True
18 NaN 19 95 60 44 2 26 30 True
19 NaN 20 95 60 44 4 25 30 True
20 NaN 21 95 60 44 5 25 29 True
21 NaN 22 95 60 44 7 25 29 True
22 NaN 23 95 60 44 8 26 31 True
23 NaN 24 95 60 44 10 25 31 True
24 NaN 25 95 60 51 0 25 30 True
25 NaN 26 95 60 51 1 25 29 True
26 NaN 27 95 60 51 2 25 27 True
27 NaN 28 95 60 51 4 25 32 True
28 NaN 29 95 60 51 5 26 30 True
29 NaN 30 95 60 51 7 25 29 True
... ... ... ... ... ... ... ... ... ...
14157 NaN 14158 300 200 70 0 29 39 True
14158 NaN 14159 300 200 70 5 30 37 True
14159 NaN 14160 300 200 70 10 29 39 True
14160 NaN 14161 300 200 70 15 30 45 True
14161 NaN 14162 300 200 70 20 30 48 True
14162 NaN 14163 300 200 77 0 29 36 True
14163 NaN 14164 300 200 77 5 30 48 True
14164 NaN 14165 300 200 77 10 30 40 True
14165 NaN 14166 300 200 77 15 29 40 True
14166 NaN 14167 300 200 77 20 30 37 True
14167 NaN 14168 300 200 80 0 30 39 True
14168 NaN 14169 300 200 80 5 30 41 True
14169 NaN 14170 300 200 80 10 31 45 True
14170 NaN 14171 300 200 80 15 32 47 True
14171 NaN 14172 300 200 80 20 31 39 True
14172 NaN 14173 300 200 88 0 34 55 True
14173 NaN 14174 300 200 88 5 32 47 True
14174 NaN 14175 300 200 88 10 32 41 True
14175 NaN 14176 300 200 88 15 32 44 True
14176 NaN 14177 300 200 88 20 33 48 True
14177 NaN 14178 300 200 90 0 32 44 True
14178 NaN 14179 300 200 90 5 32 45 True
14179 NaN 14180 300 200 90 10 31 46 True
14180 NaN 14181 300 200 90 15 33 49 True
14181 NaN 14182 300 200 90 20 33 48 True
14182 NaN 14183 300 200 100 0 33 40 True
14183 NaN 14184 300 200 100 5 33 52 True
14184 NaN 14185 300 200 100 10 35 52 True
14185 NaN 14186 300 200 100 15 35 49 True
14186 NaN 14187 300 200 100 20 34 55 True

14187 rows × 9 columns


In [17]:
regr = svm.SVR()

In [18]:
regr.fit(ga_params, wait_avg)


Out[18]:
SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [19]:
regr.score(ga_params, wait_avg)


Out[19]:
0.63811278489392076

In [27]:
plt.scatter(regr.predict(ga_params), wait_avg)
plt.plot(wait_avg, (lambda x: x)(wait_avg),color='orange')
plt.show()



In [23]:
min(wait_avg)


Out[23]:
24

In [25]:
regr_c = svm.SVR(C=1e3)
regr_c.fit(ga_params,wait_avg)



In [26]:
plt.scatter(regr_c.predict(ga_params), wait_avg)
plt.plot(wait_avg, (lambda x: x)(wait_avg), color='orange')
plt.show()



In [30]:
regr_c.score(ga_params, wait_avg)


Out[30]:
0.99836718908328415

In [31]:
from sklearn.cross_validation import cross_val_score

In [33]:
cross_val_score(regr_c, ga_params, wait_avg,cv = 10)


Out[33]:
array([-1.18982294, -0.89056972, -0.45935778, -0.92156493, -0.17180217,
       -0.59552069, -0.15200566, -0.10956671, -0.20840635, -0.24319313])

In [34]:
cross_val_score(regr, ga_params, wait_avg, cv=10)


Out[34]:
array([-0.85854256, -0.63301529, -0.26218471, -0.63608493, -0.07219641,
       -0.87099525, -0.30925181, -0.25584594, -0.39615311, -0.44608986])

In [35]:
from sklearn.model_selection import train_test_split

In [42]:
x_train, x_test, y_train, y_test = train_test_split(ga_params, wait_avg, test_size=0.1, random_state=2)

In [46]:
regr_c_0 = svm.SVR(C=1e2).fit(x_train,y_train)

In [47]:
regr_c_0.score(x_test,y_test)


Out[47]:
0.42243341651070609

In [48]:
plt.scatter(regr_c_0.predict(ga_params), wait_avg)
plt.plot(wait_avg, (lambda x: x)(wait_avg), color='orange')
plt.show()



In [ ]: