In [13]:
%install_ext https://raw.githubusercontent.com/rasbt/python_reference/master/ipython_magic/watermark.py
In [14]:
%load_ext watermark
In [15]:
%watermark
In [1]:
import numpy as np
import pandas as pd
In [2]:
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
In [16]:
data = pd.read_csv('training.csv')
In [17]:
data_ = data.append(data)
data_ = data.append(data_)
In [19]:
print("Number of lines: {}". format(data_.shape[0]))
In [20]:
%%time
data_.to_csv('big_data.csv', index=False)
In [21]:
!ls -lh big_data.csv
In [8]:
%%time
data_ = pd.read_csv('big_data.csv')
In [9]:
X = data.drop(['EventId', 'Weight', 'Label'], axis=1).values
y = data['Label'].values
w = data['Weight'].values
In [ ]:
%%time
randomforests = RandomForestClassifier(n_estimators=20, n_jobs=1).fit(X, y)
In [11]:
num_jobs=4
In [ ]:
%%time
randomforests = RandomForestClassifier(n_estimators=20, n_jobs=num_jobs).fit(X, y)