In [126]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
In [127]:
df = pd.read_csv("climate_annual.txt", delim_whitespace=True)
df.shape
Out[127]:
In [128]:
df.head()
Out[128]:
In [129]:
df2 = df.set_index("State")
df2.head()
Out[129]:
In [139]:
df3 = df2.loc["California":"California", ["Year", "PCP", "ZNDX"]]
df3.head()
Out[139]:
In [131]:
fig=plt.figure(figsize=(10,5))
#fig.suptitle('Precipitation', fontsize=14, fontweight='bold')
fig.add_subplot(121)
plt.plot(df3['Year'], df3['PCP'],label='PCP',c='r')
plt.legend(loc=0,fontsize=10)
plt.title('Precipitation Index')
plt.xlabel('Year')
plt.ylabel('PCP')
fig.add_subplot(122)
plt.plot(df3['Year'], df3['ZNDX'],label='ZNDX')
plt.legend(loc=0,fontsize=10)
plt.title('Z Index')
plt.xlabel('Year')
plt.ylabel('ZNDX')
plt.show()
In [140]:
print(df3.PCP.values)
PCP_array = df3.PCP.values
print(PCP_array.reshape(-1,1).shape)
result = []
for i in range(len(PCP_array) - 2):
result.append([PCP_array[i], PCP_array[i+1], PCP_array[i+2]])
PCP_newx = np.array(result)
PCP_newy = df3.PCP.values[3:].reshape(-1,1)
PCP_newx_train = PCP_newx[:-6]
PCP_newy_train = PCP_newy[:-5]
PCP_newx_test = PCP_newx[-6:]
PCP_newy_test = PCP_newy[-5:]
print(PCP_newx_test)
print(PCP_newy_test)
print(PCP_newx_test.shape)
print(PCP_newy_test.shape)
In [143]:
#Lasso regression
regr = linear_model.Lasso()
regr.fit(PCP_newx_train, PCP_newy_train)
PCP_lassoy_train = regr.predict(PCP_newx_train)
PCP_lassoy_test = regr.predict(PCP_newx_test)
#print(PCP_lassoy_train.shape)
#print(PCP_lassoy_test.shape)
year_all = np.append(df3.Year.values, [2017, 2018, 2019])
y_lasso = np.append(PCP_lassoy_train, PCP_lassoy_test)
PCP_17_y = y_lasso[-1]
print(PCP_17_y)
PCP_18_x = np.append(PCP_array[-2:], PCP_17_y)
print(PCP_18_x)
PCP_18_y = regr.predict(PCP_18_x)
print(PCP_18_y)
PCP_18 = PCP_18_y.item(0)
PCP_19_x = np.append(PCP_18_x[-2:], PCP_18)
print(PCP_19_x)
PCP_19_y = regr.predict(PCP_19_x)
print(PCP_19_y)
PCP_19 = PCP_19_y.item(0)
y_lasso_all = np.append(y_lasso, [PCP_18, PCP_19])
plt.figure()
plt.scatter(df3.Year.reshape(-1,1), df3.PCP)
#plt.plot(df.Year[2:-1].reshape(-1,1), y_lasso[:-1])
plt.scatter(year_all[-2:].reshape(-1,1), y_lasso_all[-2:], color='red')
plt.plot(year_all[2:-1].reshape(-1,1), y_lasso_all)
plt.show()
In [142]:
fig = plt.figure(figsize=(10,4.5))
fig.suptitle('Lasso Regression', fontsize=14, fontweight='bold')
#Plot for training data
fig.add_subplot(121)
plt.plot(df3.Year[3:-5],regr.predict(PCP_newx_train),label='Predict',c='r')
plt.plot(df3.Year[3:-5],PCP_newy_train,label='Actual')
plt.legend(loc=0,fontsize=10)
plt.title('Training Data')
plt.xlabel('Year')
plt.ylabel('PCP')
#Plot for testing data
fig.add_subplot(122)
plt.scatter(df3.Year[-5:],regr.predict(PCP_newx_test)[:-1],marker='x',c='r',label='Predict')
plt.scatter(df3.Year[-5:],PCP_newy_test, marker='*',label='Actual')
plt.legend(loc=2,fontsize=10)
plt.title('Testing Data')
plt.xlabel('Year')
plt.ylabel('PCP')
plt.tight_layout(pad=4, w_pad=4)
plt.show()
In [ ]: