In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
In [16]:
df = pd.read_csv("climate_annual.txt", delim_whitespace=True)
df2 = df.set_index("State")
df3 = df2.loc["California":"California", ["Year", "PCP", "ZNDX"]]
In [17]:
print(df3.ZNDX.values)
ZNDX_array = df3.ZNDX.values
print(ZNDX_array.reshape(-1,1).shape)
result = []
for i in range(len(ZNDX_array) - 2):
result.append([ZNDX_array[i], ZNDX_array[i+1], ZNDX_array[i+2]])
ZNDX_newx = np.array(result)
ZNDX_newy = df3.ZNDX.values[3:].reshape(-1,1)
ZNDX_newx_train = ZNDX_newx[:-6]
ZNDX_newy_train = ZNDX_newy[:-5]
ZNDX_newx_test = ZNDX_newx[-6:]
ZNDX_newy_test = ZNDX_newy[-5:]
print(ZNDX_newx_test)
print(ZNDX_newy_test)
print(ZNDX_newx_test.shape)
print(ZNDX_newy_test.shape)
In [18]:
#Lasso regression
regr = linear_model.Lasso()
regr.fit(ZNDX_newx_train, ZNDX_newy_train)
ZNDX_lassoy_train = regr.predict(ZNDX_newx_train)
ZNDX_lassoy_test = regr.predict(ZNDX_newx_test)
print(ZNDX_lassoy_train.shape)
print(ZNDX_lassoy_test.shape)
year_all = np.append(df3.Year.values, [2017, 2018, 2019])
y_lasso = np.append(ZNDX_lassoy_train, ZNDX_lassoy_test)
ZNDX_17_y = y_lasso[-1]
print(ZNDX_17_y)
ZNDX_18_x = np.append(ZNDX_array[-2:], ZNDX_17_y)
print(ZNDX_18_x)
ZNDX_18_y = regr.predict(ZNDX_18_x)
print(ZNDX_18_y)
ZNDX_18 = ZNDX_18_y.item(0)
ZNDX_19_x = np.append(ZNDX_18_x[-2:], ZNDX_18)
print(ZNDX_19_x)
ZNDX_19_y = regr.predict(ZNDX_19_x)
print(ZNDX_19_y)
ZNDX_19 = ZNDX_19_y.item(0)
y_lasso_all = np.append(y_lasso, [ZNDX_18, ZNDX_19])
print(year_all.shape[0])
print(y_lasso.shape[0])
print(df3.Year[3:].reshape(-1,1).shape)
plt.figure()
plt.scatter(df3.Year.reshape(-1,1), df3.ZNDX)
#plt.plot(df.Year[2:-1].reshape(-1,1), y_lasso[:-1])
plt.scatter(year_all[-2:].reshape(-1,1), y_lasso_all[-2:], color='red')
plt.plot(year_all[2:-1].reshape(-1,1), y_lasso_all)
plt.show()
In [19]:
fig = plt.figure(figsize=(10,4.5))
fig.suptitle('Lasso Regression', fontsize=14, fontweight='bold')
#Plot for training data
fig.add_subplot(121)
plt.plot(df3.Year[3:-5],regr.predict(ZNDX_newx_train),label='Predict',c='r')
plt.plot(df3.Year[3:-5],ZNDX_newy_train,label='Actual')
plt.legend(loc=0,fontsize=10)
plt.title('Training Data')
plt.xlabel('Year')
plt.ylabel('Z Indx')
#Plot for testing data
fig.add_subplot(122)
plt.scatter(df3.Year[-5:],regr.predict(ZNDX_newx_test)[:-1],marker='x',c='r',label='Predict')
plt.scatter(df3.Year[-5:],ZNDX_newy_test, marker='*',label='Actual')
plt.legend(loc=2,fontsize=10)
plt.title('Testing Data')
plt.xlabel('Year')
plt.ylabel('Z Index')
plt.tight_layout(pad=4, w_pad=4)
plt.show()
In [ ]: