In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
def rmse(act, pred):
return np.sqrt(((act - pred) ** 2).mean())
In [ ]:
# train = pd.read_csv('~/tensorflow_python3/July/day3/train.csv')
test = pd.read_csv('~/tensorflow_python3/July/day3/test.csv')
train.head()
In [4]:
# 画图发现有个一数据比较异常,去掉
train.drop(17, inplace=True)
# 其实还构造了一些特征,但是发现效果反而更差
In [3]:
model = LinearRegression().fit(train.iloc[:, 2:], train.iloc[:, 1])
result = model.predict(test.iloc[:, 2:])
print('result: ', result)
print('rmse: %f' % rmse(test.iloc[:, 1], result))
In [ ]: