In [103]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [104]:
def rmse(act, pred):
    return np.sqrt(((act-pred) ** 2).mean())

In [105]:
train = pd.read_csv('./regression_train.csv')
test = pd.read_csv('./regression_test.csv')
lrtrain = pd.read_csv('./lr_trainresult.csv')
lrtest = pd.read_csv('./lr_testresult.csv')
caitrain = pd.read_csv('./tttttttttrain.csv')
caitest = pd.read_csv('./tttttttttt.csv')
train['13'] = lrtrain.values
test['13'] = lrtest.values
train['14'] = caitrain.values
test['14'] = caitest.values

In [106]:
train.head()


Out[106]:
y 1 2 3 4 5 6 7 8 9 10 11 12 13 14
0 90 6 2 1036 103 114 1.00 1.00 172076 355965 2.0 6527 1851864 90 91.673476
1 88 1 0 2165 205 101 0.40 1.20 43107 44139 3.0 130 1131931 91 90.754319
2 85 62 77 3806 258 166 1.40 1.40 492142 268706 5.2 256 1314590 83 82.319224
3 81 5 0 4721 256 177 0.99 2.58 524787 174964 1.0 233 972606 83 82.775383
4 79 42 55 3949 249 244 2.60 4.60 197289 529200 3.4 331 1013805 73 79.247167

In [107]:
'''
train.drop('12', axis=1, inplace=True)
test.drop('12', axis=1, inplace=True)
train.head()
'''


Out[107]:
"\ntrain.drop('12', axis=1, inplace=True)\ntest.drop('12', axis=1, inplace=True)\ntrain.head()\n"

In [108]:
model = LinearRegression().fit(train.iloc[:, 1:], train.iloc[:, 0])
result = model.predict(test.iloc[:, 1:])
#result = np.floor(result)
print(rmse(test.iloc[:, 0], result))
print(result)


3.33577057825
[ 84.69009271  80.42153114  81.27666934 ...,  93.25374827  95.46488147
  80.46041455]

In [109]:
# 3.47570697131
# 3.46963542896
# 3.33577057825

In [110]:
cai = pd.read_csv('./tttttttttt.csv')
rmse(test.iloc[:, 0], cai['cai'].values)


Out[110]:
3.3445622605544982

In [ ]: