In [1]:
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt

In [2]:
# get the dataset using numpy's genfromtxt
data = np.genfromtxt('challenge_dataset.txt', delimiter = ',')

In [3]:
# split the values
x_values = data[:, 0]
y_values = data[:, 1]

In [4]:
# visualize the data on a scatter plot
plt.scatter(x_values, y_values)
plt.show()



In [5]:
# reshape the data
x_values = np.reshape(x_values, (97, 1))
y_values = np.reshape(y_values, (97, 1))

In [6]:
# get the linear regression model
model = linear_model.LinearRegression()

In [7]:
# train the model with the data
out = model.fit(x_values, y_values)

In [8]:
# test the data and plot it out
plt.scatter(x_values, y_values)
plt.plot(x_values, out.predict(x_values))
plt.show()



In [ ]: