In [6]:
import numpy as np
from sklearn import linear_model
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import cross_val_score
#####################################################################
# DataSet
#####################################################################
train_file = open('../../0.part.tokens.LTR_FT', 'rb')
character = {}
for line in train_file:
terms = line.split('\t')
key = terms[0] + ' ' + terms[1]
if not key in character:
character[key] = []
character[key].append(terms[2])
train_file.close()
samples = character.values()
In [14]:
def sample2Xy(samples, indexes):
X = []
y = []
for index in indexes:
for des in samples[index]:
terms = des.split(' ')
X.append([float(x) for x in terms[1:]])
y.append(float(terms[0]))
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)
return X, y
kf = KFold(n_splits = 5, shuffle = True)
mse_linear_model = []
mae_linear_model = []
mse_tree_model = []
mae_tree_model = []
for train, test in kf.split(samples):
X_train, y_train = sample2Xy(samples, train)
X_test, y_test = sample2Xy(samples, test)
# Linear Regression
clf = linear_model.LinearRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
mse_linear_model.append(mean_squared_error(y_test, y_pred))
mae_linear_model.append(mean_absolute_error(y_test, y_pred))
# Gradient Boosting Regressor
clf = GradientBoostingRegressor(learning_rate=0.05, random_state=1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
mse_tree_model.append(mean_squared_error(y_test, y_pred))
mae_tree_model.append(mean_absolute_error(y_test, y_pred))
print 'Linear Regression MSE ' + str(abs(np.mean(mse_linear_model)))
print 'Linear Regression MAE ' + str(abs(np.mean(mae_linear_model)))
print 'Gradient Boosting Regressor MSE ' + str(abs(np.mean(mse_tree_model)))
print 'Gradient Boosting Regressor MAE ' + str(abs(np.mean(mae_tree_model)))
In [65]:
i = 495
print y_test[i: i + 10]
print y_pred[i: i + 10]