notebook.community

Edit and run



In [1]:

    
import sys
import os
sys.path.append("/Users/ajeetjha/sandbox/sherlock/blackops/scripts")
import lib.dbUtil as dbUtil
import lib.genericUtil as gUtil
import lib.mongoUtil as mUtil
import pandas as pd
import numpy as np
from datetime import datetime, date, time, timedelta
import pprint



In [2]:

    
df = gUtil.readCsvToPD('d1_w1_data/first5_day_2017_2_16_d1.csv')



In [3]:

    
import numpy as np
from sklearn.cross_validation import train_test_split

label = df['times_loaded']
features = df.drop('times_loaded', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(features, label,test_size=0.2)



In [4]:

    
X_train_pid = X_train["pid"]
X_train = X_train.drop("pid", axis=1)
X_train = X_train.drop(df.columns[0], axis=1)

X_test_pid = X_test["pid"]
X_test = X_test.drop("pid", axis=1)
X_test = X_test.drop(df.columns[0], axis=1)



In [6]:

    
X_test.head()









    Out[6]:






  
    
      
      chaal_by_blind
      avg_win_by_boot
      avg_loss_by_boot
    
  
  
    
      87674
      0.0
      0
      0
    
    
      1339
      5.0
      3
      25
    
    
      4399
      11.0
      3
      8
    
    
      73208
      0.0
      0
      0
    
    
      28141
      19.0
      0
      4



In [7]:

    
from sklearn import svm
clf = svm.SVC() 
clf.fit(X_train, y_train)









    Out[7]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)



In [8]:

    
y_pred = clf.predict(X_test)



In [13]:

    
finalTestData = X_test
finalTestData["pid"] = X_test_pid
finalTestData["times_loaded_actual"] = y_test
finalTestData["times_loaded_pred"] = y_pred



In [14]:

    
finalTestData.head()









    Out[14]:






  
    
      
      chaal_by_blind
      avg_win_by_boot
      avg_loss_by_boot
      times_loaded_actual
      times_loaded_pred
      pid
    
  
  
    
      87674
      0.0
      0
      0
      1
      0
      1007444298225
    
    
      1339
      5.0
      3
      25
      1
      1
      1000311335714
    
    
      4399
      11.0
      3
      8
      1
      1
      10052494608375
    
    
      73208
      0.0
      0
      0
      1
      0
      10040058596129
    
    
      28141
      19.0
      0
      4
      0
      0
      1007804264917



In [16]:

    
gUtil.dumpPDtoCsv(finalTestData,'d1_w1_data/first5_day_2017_2_16_D1_pred.csv')



In [ ]:

	chaal_by_blind	avg_win_by_boot	avg_loss_by_boot	times_loaded_actual	times_loaded_pred	pid
87674	0.0	0	0	1	0	1007444298225
1339	5.0	3	25	1	1	1000311335714
4399	11.0	3	8	1	1	10052494608375
73208	0.0	0	0	1	0	10040058596129
28141	19.0	0	4	0	0	1007804264917