In [56]:
%matplotlib inline
from __future__ import absolute_import
from __future__ import print_function
In [63]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
from sklearn.svm import SVC
In [2]:
df = pd.read_csv("../data/Springleaf/train.csv")
In [76]:
df_one = df[(df.target==1)][:5000]
df_zero = df[(df.target==0)][:5000]
In [77]:
df_reduced = df_one.append(df_zero)
df_tmp = df_reduced[["VAR_0002","VAR_0003","VAR_0004","VAR_0006","VAR_0007","target"]].dropna()
df_train =df_tmp.values
In [79]:
df_train.size
Out[79]:
In [80]:
np.random.shuffle(df_train)
X = df_train[:,:4]
Y = df_train[:,5]
model = RandomForestClassifier(n_estimators =100)
scores = cross_validation.cross_val_score(model, X, Y, cv=5)
In [84]:
print (scores)
print (scores.mean())
In [82]:
model_svm= SVC()
cross_validation.cross_val_score(model_svm, X, Y, cv=5)
Out[82]:
In [ ]: