In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import linear_model
In [2]:
# get the dataset
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
In [3]:
df_train.head()
Out[3]:
In [4]:
# keep the required data and drop the unnecessary
df_train = df_train[['LotFrontage', 'LotArea', 'SaleCondition', 'SalePrice']]
In [5]:
df_train.head()
Out[5]:
In [16]:
# same goes for the testing data
df_test = df_test[['LotFrontage', 'LotArea', 'SaleCondition']]
df_test.head()
Out[16]:
In [17]:
# prepare the linear regression model
linear_reg = linear_model.LinearRegression()
In [26]:
val = linear_reg.fit(df_train[['LotArea']], df_train[['SalePrice']])
In [27]:
plt.scatter(df_train[['LotArea']], df_train[['SalePrice']])
plt.plot(df_train[['LotArea']], val.predict(df_train[['LotArea']]))
plt.show()
In [ ]: