In [13]:
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import make_scorer
In [4]:
train = pd.read_csv('train.csv')
train.head()
Out[4]:
In [7]:
model = ExtraTreesRegressor()
In [8]:
X = train[ ['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed'] ].values
y = train['count'].values
model.fit(X, y)
Out[8]:
where
n is the number of hours in the test set
pi is your predicted count
ai is the actual count
log(x) is the natural logarithm
In [10]:
def rmsle(y_true, y_pred):
diff = np.log(y_pred + 1) - np.log(y_true + 1)
mean_error = np.square(diff).mean()
return np.sqrt(mean_error)
scorer = make_scorer(rmsle, greater_is_better=False)
In [14]:
y_pred = model.predict(X)
rmsle(y, y_pred)
Out[14]:
In [ ]: