In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
In [2]:
statelist=["AK","AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","US","UT","VA","VT","WA","WI","WV","WY"]
print(len(statelist))
In [10]:
fn_ext = '.csv'
filename = []
for state in statelist:
fn = "".join((state, fn_ext))
filename.append(fn)
df = pd.read_csv(fn)
print(df.shape[0])
In [11]:
#reshape the input for each three years
OP_array = df["Inflation Adjusted Price"].values
OP = OP_array.reshape(-1,1)
result = []
for i in range(len(OP_array) - 2):
result.append([OP_array[i], OP_array[i+1], OP_array[i+2]])
OP_newx = np.array(result)
OP_newy = OP_array[3:].reshape(-1,1)
OP_newx_train = OP_newx[:-6]
OP_newy_train = OP_newy[:-5]
OP_newx_test = OP_newx[-6:]
OP_newy_test = OP_newy[-5:]
#Lasso regression
regr = linear_model.Lasso()
regr.fit(OP_newx_train, OP_newy_train)
OP_lassoy_train = regr.predict(OP_newx_train)
OP_lassoy_test = regr.predict(OP_newx_test)
year_all = np.append(df.Year.values, [2017, 2018, 2019])
y_lasso = np.append(OP_lassoy_train, OP_lassoy_test)
OP_17_y = y_lasso[-1]
print(OP_17_y)
OP_18_x = np.append(OP_array[-2:], OP_17_y)
print(OP_18_x)
OP_18_y = regr.predict(OP_18_x)
print(OP_18_y)
OP_18 = OP_18_y.item(0)
OP_19_x = np.append(OP_18_x[-2:], OP_18)
print(OP_19_x)
OP_19_y = regr.predict(OP_19_x)
print(OP_19_y)
OP_19 = OP_19_y.item(0)
y_lasso_all = np.append(y_lasso, [OP_18, OP_19])
plt.figure()
plt.scatter(df.Year.reshape(-1,1), df["Inflation Adjusted Price"])
plt.scatter(year_all[-2:].reshape(-1,1), y_lasso_all[-2:], color='red')
plt.plot(year_all[3:].reshape(-1,1), y_lasso_all)
plt.show()
In [ ]: