In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

train = train.dropna()
test = test.dropna()

train['month'] = pd.DatetimeIndex(train['datetime']).month
train['week'] = pd.DatetimeIndex(train['datetime']).week
train['hour'] = pd.DatetimeIndex(train['datetime']).hour

test['month'] = pd.DatetimeIndex(test['datetime']).month
test['week'] = pd.DatetimeIndex(test['datetime']).week
test['hour'] = pd.DatetimeIndex(test['datetime']).hour

trainX = train[['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'month', 'week', 'hour']].values.astype("float32")
trainY = train['count'].values.astype("int")

testX = test[['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'month', 'week', 'hour']].values.astype("float32")

In [5]:
parameters = {'min_samples_split' : [20], 'n_estimators' : [100, 200]}
clf = GridSearchCV(RandomForestClassifier(), parameters, n_jobs=1, verbose=1)

clf.fit(trainX, trainY)

print("Best score: %0.3f" % clf.best_score_)
print("Best parameters set:")
best_parameters = clf.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))
    
# Get best model
best_model = clf.best_estimator_

# Fit model with best parameters optimized for quadratic_weighted_kappa
best_model.fit(trainX, trainY)
preds = best_model.predict(testX)

# Create your submission file
submission = pd.DataFrame({"datetime": test['datetime'], "count": preds})
submission.to_csv("submission.csv", index=False)


[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    5.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   51.6s finished
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best score: 0.015
Best parameters set:
	min_samples_split: 20
	n_estimators: 100

In [6]:
# LB score : 0.72923
submission


Out[6]:
count datetime
0 5 2011-01-20 00:00:00
1 5 2011-01-20 01:00:00
2 2 2011-01-20 02:00:00
3 2 2011-01-20 03:00:00
4 2 2011-01-20 04:00:00
5 2 2011-01-20 05:00:00
6 88 2011-01-20 06:00:00
7 99 2011-01-20 07:00:00
8 124 2011-01-20 08:00:00
9 97 2011-01-20 09:00:00
10 127 2011-01-20 10:00:00
11 91 2011-01-20 11:00:00
12 84 2011-01-20 12:00:00
13 91 2011-01-20 13:00:00
14 112 2011-01-20 14:00:00
15 129 2011-01-20 15:00:00
16 83 2011-01-20 16:00:00
17 177 2011-01-20 17:00:00
18 103 2011-01-20 18:00:00
19 193 2011-01-20 19:00:00
20 90 2011-01-20 20:00:00
21 90 2011-01-20 21:00:00
22 52 2011-01-20 22:00:00
23 52 2011-01-20 23:00:00
24 12 2011-01-21 00:00:00
25 7 2011-01-21 01:00:00
26 2 2011-01-21 02:00:00
27 4 2011-01-21 03:00:00
28 2 2011-01-21 04:00:00
29 2 2011-01-21 05:00:00
... ... ...
6463 106 2012-12-30 18:00:00
6464 14 2012-12-30 19:00:00
6465 106 2012-12-30 20:00:00
6466 8 2012-12-30 21:00:00
6467 107 2012-12-30 22:00:00
6468 8 2012-12-30 23:00:00
6469 5 2012-12-31 00:00:00
6470 5 2012-12-31 01:00:00
6471 1 2012-12-31 02:00:00
6472 1 2012-12-31 03:00:00
6473 1 2012-12-31 04:00:00
6474 1 2012-12-31 05:00:00
6475 64 2012-12-31 06:00:00
6476 72 2012-12-31 07:00:00
6477 235 2012-12-31 08:00:00
6478 51 2012-12-31 09:00:00
6479 52 2012-12-31 10:00:00
6480 64 2012-12-31 11:00:00
6481 52 2012-12-31 12:00:00
6482 86 2012-12-31 13:00:00
6483 91 2012-12-31 14:00:00
6484 86 2012-12-31 15:00:00
6485 90 2012-12-31 16:00:00
6486 86 2012-12-31 17:00:00
6487 124 2012-12-31 18:00:00
6488 98 2012-12-31 19:00:00
6489 71 2012-12-31 20:00:00
6490 71 2012-12-31 21:00:00
6491 97 2012-12-31 22:00:00
6492 53 2012-12-31 23:00:00

6493 rows × 2 columns