In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

def rmse(act, pred):
    return np.sqrt(((act - pred) ** 2).mean())

In [ ]:
# train = pd.read_csv('~/tensorflow_python3/July/day3/train.csv')
test = pd.read_csv('~/tensorflow_python3/July/day3/test.csv')
train.head()

In [4]:
# 画图发现有个一数据比较异常,去掉
train.drop(17, inplace=True)

# 其实还构造了一些特征,但是发现效果反而更差


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-295fa43f6472> in <module>()
      1 # 画图发现有个一数据比较异常,去掉
----> 2 train.drop(17, inplace=True)
      3 
      4 # 其实还构造了一些特征,但是发现效果反而更差

NameError: name 'train' is not defined

In [3]:
model = LinearRegression().fit(train.iloc[:, 2:], train.iloc[:, 1])
result = model.predict(test.iloc[:, 2:])
print('result: ', result)
print('rmse: %f' % rmse(test.iloc[:, 1], result))


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-b290a45ac428> in <module>()
----> 1 model = LinearRegression().fit(train.iloc[:, 2:], train.iloc[:, 1])
      2 result = model.predict(test.iloc[:, 2:])
      3 print('result: ', result)
      4 print('rmse: %f' % rmse(test.iloc[:, 1], result))

NameError: name 'train' is not defined

In [ ]: