In [1]:
import sys
import os
sys.path.append("/Users/ajeetjha/sandbox/sherlock/blackops/scripts")
import lib.dbUtil as dbUtil
import lib.genericUtil as gUtil
import lib.mongoUtil as mUtil
import pandas as pd
import numpy as np
from datetime import datetime, date, time, timedelta
import pprint
In [2]:
df = gUtil.readCsvToPD('d1_w1_data/first5_day_2017_2_16_d1.csv')
In [3]:
import numpy as np
from sklearn.cross_validation import train_test_split
label = df['times_loaded']
features = df.drop('times_loaded', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(features, label,test_size=0.2)
In [4]:
X_train_pid = X_train["pid"]
X_train = X_train.drop("pid", axis=1)
X_train = X_train.drop(df.columns[0], axis=1)
X_test_pid = X_test["pid"]
X_test = X_test.drop("pid", axis=1)
X_test = X_test.drop(df.columns[0], axis=1)
In [6]:
X_test.head()
Out[6]:
In [7]:
from sklearn import svm
clf = svm.SVC()
clf.fit(X_train, y_train)
Out[7]:
In [8]:
y_pred = clf.predict(X_test)
In [13]:
finalTestData = X_test
finalTestData["pid"] = X_test_pid
finalTestData["times_loaded_actual"] = y_test
finalTestData["times_loaded_pred"] = y_pred
In [14]:
finalTestData.head()
Out[14]:
In [16]:
gUtil.dumpPDtoCsv(finalTestData,'d1_w1_data/first5_day_2017_2_16_D1_pred.csv')
In [ ]: