In [103]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
In [104]:
def rmse(act, pred):
return np.sqrt(((act-pred) ** 2).mean())
In [105]:
train = pd.read_csv('./regression_train.csv')
test = pd.read_csv('./regression_test.csv')
lrtrain = pd.read_csv('./lr_trainresult.csv')
lrtest = pd.read_csv('./lr_testresult.csv')
caitrain = pd.read_csv('./tttttttttrain.csv')
caitest = pd.read_csv('./tttttttttt.csv')
train['13'] = lrtrain.values
test['13'] = lrtest.values
train['14'] = caitrain.values
test['14'] = caitest.values
In [106]:
train.head()
Out[106]:
In [107]:
'''
train.drop('12', axis=1, inplace=True)
test.drop('12', axis=1, inplace=True)
train.head()
'''
Out[107]:
In [108]:
model = LinearRegression().fit(train.iloc[:, 1:], train.iloc[:, 0])
result = model.predict(test.iloc[:, 1:])
#result = np.floor(result)
print(rmse(test.iloc[:, 0], result))
print(result)
In [109]:
# 3.47570697131
# 3.46963542896
# 3.33577057825
In [110]:
cai = pd.read_csv('./tttttttttt.csv')
rmse(test.iloc[:, 0], cai['cai'].values)
Out[110]:
In [ ]: