In [ ]:
# Put your code to import the libraries here.
Next, you'll want to import the csv file using pandas. You should assign the file "../data/data2.csv" to a variable called oilData.
In [ ]:
# Put your code to load the data from our csv file here.
In [ ]:
# This is some data cleaning which we haven't gone over.
# You can ignore this for now, we'll revisit it in a future session.
# Convert the Value field to a numeric.
oilData[['Value']] = oilData[['Value']].apply(pd.to_numeric, errors="coerce")
# Cast the YYYYMM field to a date-time.
oilData['YYYYMM'] = pd.to_datetime(oilData['YYYYMM'], format='%Y%m', errors='coerce')
# Get rid of rows missing data. Most of the stuff we'll do from now on REALLY doesn't like having missing data
oilData2 = oilData.dropna()
# Print out a nice summary of the columns
oilData2.describe(include = "all")
In [ ]:
# This is a way to plot data directly from the data frame without having to transform anything.
# We'll learn more about it later.
oilData2.plot(x='YYYYMM', y='Value')
plt.show()
In [ ]:
# Put your code to define X and y here.
In [ ]:
# You'll run into a little bit of trouble when trying to plot the data directly.
# The code in this cell converts the datetimes into a numeric so we can run the linear regression.
# This isn't strictly the correct thing to do, but it's quick and easy.
# We'll learn the "right" way to do this in another session.
X2 = np.zeros(X.shape)
for i in range(0,X.shape[0]):
X2[i, 0] = X[i,0] - X[0,0]
In [ ]:
# Put your linear regression training and plotting code here.
# Make sure that you change X to X2 everywhere it exists!
In [ ]: