In [1]:
import sys
import os
sys.path.append("/Users/ajeetjha/sandbox/sherlock/blackops/scripts")
import lib.dbUtil as dbUtil
import lib.genericUtil as gUtil
import lib.mongoUtil as mUtil
import pandas as pd
import numpy as np
from datetime import datetime, date, time, timedelta
import pprint

In [2]:
df = gUtil.readCsvToPD('d1_w1_data/first5_day_2017_2_16_d1.csv')

In [3]:
import numpy as np
from sklearn.cross_validation import train_test_split

label = df['times_loaded']
features = df.drop('times_loaded', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(features, label,test_size=0.2)

In [4]:
X_train_pid = X_train["pid"]
X_train = X_train.drop("pid", axis=1)
X_train = X_train.drop(df.columns[0], axis=1)

X_test_pid = X_test["pid"]
X_test = X_test.drop("pid", axis=1)
X_test = X_test.drop(df.columns[0], axis=1)

In [6]:
X_test.head()


Out[6]:
chaal_by_blind avg_win_by_boot avg_loss_by_boot
87674 0.0 0 0
1339 5.0 3 25
4399 11.0 3 8
73208 0.0 0 0
28141 19.0 0 4

In [7]:
from sklearn import svm
clf = svm.SVC() 
clf.fit(X_train, y_train)


Out[7]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [8]:
y_pred = clf.predict(X_test)

In [13]:
finalTestData = X_test
finalTestData["pid"] = X_test_pid
finalTestData["times_loaded_actual"] = y_test
finalTestData["times_loaded_pred"] = y_pred

In [14]:
finalTestData.head()


Out[14]:
chaal_by_blind avg_win_by_boot avg_loss_by_boot times_loaded_actual times_loaded_pred pid
87674 0.0 0 0 1 0 1007444298225
1339 5.0 3 25 1 1 1000311335714
4399 11.0 3 8 1 1 10052494608375
73208 0.0 0 0 1 0 10040058596129
28141 19.0 0 4 0 0 1007804264917

In [16]:
gUtil.dumpPDtoCsv(finalTestData,'d1_w1_data/first5_day_2017_2_16_D1_pred.csv')

In [ ]: