In [1]:
# import data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split

In [2]:
# read data
data = pd.read_csv("CA.csv")
print(data)
year1 = data[['Year'][-10:]]
#print(year1)
year2 = data[['Year'][:-10]]


    Unnamed: 0    Year    HYTCP    WYTCP   SOEGP    NUETP           GDP  \
0            0  1960.0  17445.0      0.0     0.0      0.0  5.143560e+04   
1            1  1961.0  15368.0      5.0     0.0      0.0  5.689940e+04   
2            2  1962.0  22835.0      7.0     0.0      0.0  6.236320e+04   
3            3  1963.0  25334.0    193.0     0.0      0.0  6.780900e+04   
4            4  1964.0  22098.0    367.0     0.0      0.0  7.351300e+04   
5            5  1965.0  30523.0    270.0     0.0      0.0  7.825300e+04   
6            6  1966.0  26236.0    163.0     0.0      0.0  8.462700e+04   
7            7  1967.0  35277.0    563.0     0.0      0.0  8.957100e+04   
8            8  1968.0  27175.0   1505.0     0.0      0.0  9.872100e+04   
9            9  1969.0  40418.0   2458.0     0.0      0.0  1.066290e+05   
10          10  1970.0  38082.0   3132.0     0.0      0.0  1.119850e+05   
11          11  1971.0  39018.0   3519.0     0.0      0.0  1.203720e+05   
12          12  1972.0  31755.0   3175.0     0.0      0.0  1.331430e+05   
13          13  1973.0  38754.0   2631.0     0.0      0.0  1.472310e+05   
14          14  1974.0  46422.0   3698.0     0.0      0.0  1.618150e+05   
15          15  1975.0  40103.0   6071.0     0.0      0.0  1.783940e+05   
16          16  1976.0  23193.0   4807.0     0.0      0.0  1.973470e+05   
17          17  1977.0  14251.0   8115.0     0.0      0.0  2.295920e+05   
18          18  1978.0  37206.0   7659.0     0.0      0.0  2.628030e+05   
19          19  1979.0  33920.0   8762.0     0.0      0.0  2.934910e+05   
20          20  1980.0  40780.0   4920.0     0.0      0.0  3.279580e+05   
21          21  1981.0  29764.0   3206.0     0.0      0.0  3.688080e+05   
22          22  1982.0  50226.0   3735.0     0.0      0.0  3.937880e+05   
23          23  1983.0  56885.0   5613.0     0.0      1.0  4.261430e+05   
24          24  1984.0  43159.0  14144.0     5.0      4.0  4.821660e+05   
25          25  1985.0  31717.0  19729.0    11.0      3.0  5.239060e+05   
26          26  1986.0  41459.0  26215.0    14.0      3.0  5.630820e+05   
27          27  1987.0  24564.0  30387.0    10.0      4.0  6.153590e+05   
28          28  1988.0  23474.0  30863.0     9.0      1.0  6.715750e+05   
29          29  1989.0  30801.0  32519.0   250.0   2079.0  7.229790e+05   
..         ...     ...      ...      ...     ...      ...           ...   
31          31  1991.0  21957.0  31542.0   471.0   2915.0  7.900460e+05   
32          32  1992.0  20167.0  35244.0   399.0   2864.0  8.073580e+05   
33          33  1993.0  40493.0  31581.0   462.0   2984.0  8.264470e+05   
34          34  1994.0  23013.0  33752.0   486.0   3387.0  8.613600e+05   
35          35  1995.0  48033.0  30246.0   497.0   3087.0  9.115770e+05   
36          36  1996.0  44751.0  34097.0   521.0   3079.0  9.641860e+05   
37          37  1997.0  41055.0  30512.0   511.0   3137.0  1.037091e+06   
38          38  1998.0  49548.0  34594.0   502.0   2758.0  1.151119e+06   
39          39  1999.0  40737.0  33372.0   495.0   3230.0  1.248006e+06   
40          40  2000.0  38334.0  35176.0   493.0   3518.0  1.362995e+06   
41          41  2001.0  25542.0  33220.0   542.0   3500.0  1.376163e+06   
42          42  2002.0  31141.0  34352.0   554.0   3803.0  1.431267e+06   
43          43  2003.0  36371.0  35594.0   534.0   3895.0  1.523473e+06   
44          44  2004.0  34141.0  30268.0   571.0   4306.0  1.645084e+06   
45          45  2005.0  39632.0  36155.0   537.0   4262.0  1.766693e+06   
46          46  2006.0  48047.0  31959.0   495.0   4883.0  1.879520e+06   
47          47  2007.0  27328.0  35792.0   557.0   5585.0  1.956523e+06   
48          48  2008.0  24128.0  32482.0   670.0   5385.0  1.983926e+06   
49          49  2009.0  27888.0  31764.0   647.0   5840.0  1.912115e+06   
50          50  2010.0  33431.0  32201.0   765.0   6079.0  1.965886e+06   
51          51  2011.0  42557.0  36663.0   861.0   7752.0  2.036297e+06   
52          52  2012.0  26837.0  18507.0  1328.0   9754.0  2.131199e+06   
53          53  2013.0  23755.0  17912.0  3727.0  12822.0  2.223958e+06   
54          54  2014.0  16531.0  16986.0  9834.0  12992.0  2.350807e+06   
55          55  2015.0      NaN      NaN     NaN      NaN  1.972377e+06   
56          56  2016.0      NaN      NaN     NaN      NaN  2.031505e+06   
57          57  2017.0      NaN      NaN     NaN      NaN  2.073136e+06   
58          58  2018.0      NaN      NaN     NaN      NaN  2.115467e+06   
59          59  2019.0      NaN      NaN     NaN      NaN  2.157770e+06   
60          60  2020.0      NaN      NaN     NaN      NaN  2.200074e+06   

          CLPRB         EMFDB        ENPRP          NGMPB         PAPRB  \
0      0.000000      0.000000     0.000000  589695.145060  1.771042e+06   
1      0.000000      0.000000     0.000000  633797.940590  1.737732e+06   
2      0.000000      0.000000     0.000000  642889.456260  1.720222e+06   
3      0.000000      0.000000     0.000000  736625.842790  1.745266e+06   
4      0.000000      0.000000     0.000000  756639.938890  1.740052e+06   
5      0.000000      0.000000     0.000000  752461.647380  1.835282e+06   
6      0.000000      0.000000     0.000000  785759.223820  2.002711e+06   
7      0.000000      0.000000     0.000000  776043.300260  2.083470e+06   
8      0.000000      0.000000     0.000000  814570.862530  2.177877e+06   
9      0.000000      0.000000     0.000000  772179.491550  2.176688e+06   
10     0.000000      0.000000     0.000000  739623.684340  2.158708e+06   
11     0.000000      0.000000     0.000000  700449.587690  2.079207e+06   
12     0.000000      0.000000     0.000000  561777.951740  2.012728e+06   
13     0.000000      0.000000     0.000000  507133.068650  1.949235e+06   
14     0.000000      0.000000     0.000000  419143.681910  1.873417e+06   
15     0.000000      0.000000     0.000000  365212.133350  1.868754e+06   
16     0.000000      0.000000     0.000000  400198.670770  1.890922e+06   
17     0.000000      0.000000     0.000000  353544.214460  2.027732e+06   
18     0.000000      0.000000     0.000000  352083.582160  2.013650e+06   
19     0.000000      0.000000     0.000000  282281.173410  2.043154e+06   
20     0.000000      0.000000     0.000000  342208.328920  2.070153e+06   
21     0.000000      0.000000     0.000000  414770.715990  2.232756e+06   
22     0.000000      0.000000     0.000000  432487.213510  2.329118e+06   
23     0.000000      0.000000     0.000000  463384.589870  2.347190e+06   
24     0.000000      0.000000     0.000000  526963.163720  2.389716e+06   
25   852.000000    578.033290    91.176730  546052.751980  2.458487e+06   
26     0.000000    610.944910    96.713330  511410.722250  2.358657e+06   
27   552.000000    667.850100   106.099830  468677.761570  2.295060e+06   
28   648.000000    669.909760   106.810560  444232.977320  2.238875e+06   
29   492.000000    631.336660   101.022230  404557.660430  2.112650e+06   
..          ...           ...          ...            ...           ...   
31   684.000000    618.236050    99.628730  414017.142190  2.035893e+06   
32  1236.000000    651.364350   105.350780  402191.760080  2.018632e+06   
33     0.000000    686.467180   111.416690  352455.970670  1.993628e+06   
34     0.000000    753.990230   122.825320  340053.118510  1.992700e+06   
35     0.000000    724.953310   118.511420  308223.983960  2.033979e+06   
36     0.000000    297.700520    48.838590  320465.715730  2.011602e+06   
37     0.000000    526.367570    86.658560  313734.335920  1.967981e+06   
38     0.000000    620.437570   102.509640  349864.265080  1.913188e+06   
39     0.000000    575.600310    95.240350  410698.817710  1.813770e+06   
40     0.000000    691.267920   114.527590  390975.455400  1.775519e+06   
41     0.000000    758.043380   125.754130  408479.078050  1.692243e+06   
42     0.000000   1032.953090   171.582900  394507.109550  1.669199e+06   
43     0.000000   1213.083940   202.393840  373313.527210  1.624000e+06   
44     0.000000   1102.974250   184.838880  356231.719530  1.550108e+06   
45     0.000000   2155.600700   362.789470  353819.535400  1.489718e+06   
46     0.000000   5534.308260   935.595310  351100.499310  1.447460e+06   
47     0.000000  12532.827110  2128.240820  339526.550440  1.399992e+06   
48     0.000000  13309.875940  2270.016060  331492.568970  1.384408e+06   
49     0.000000   6861.251470  1177.514910  309834.883070  1.328165e+06   
50    76.270489   6625.601172  1137.323720  270557.609868  1.470517e+06   
51   114.654193   6462.703992  1109.617441  244453.061373  1.536844e+06   
52   134.308225   6359.118192  1091.691303  225563.599155  1.564214e+06   
53   145.090308   6304.281471  1081.915282  210625.372980  1.571620e+06   
54   151.436417   6288.444013  1078.659354  197851.727459  1.568798e+06   
55   155.318094   6304.293457  1081.108508  186263.343209  1.560733e+06   
56   158.214000   6346.142410  1088.447732  175324.552241  1.549981e+06   
57   160.617019   6409.115963  1099.047002  164741.230111  1.537854e+06   
58   162.527153   6489.151694  1112.906320  154352.895510  1.525021e+06   
59   164.437286   6582.999665  1128.395660  144070.818157  1.511826e+06   
60   166.347419   6687.409935  1146.330036  133847.346607  1.498447e+06   

          PCP       ZNDX  Nominal Price  Inflation Adjusted Price  
0   21.770000  -4.360000       2.910000                 23.720000  
1   16.530000  -6.810000       2.850000                 22.960000  
2   22.100000  -0.680000       2.850000                 22.690000  
3   24.740000   9.900000       2.910000                 22.900000  
4   22.270000   2.690000       3.000000                 23.300000  
5   21.980000   4.630000       3.010000                 23.000000  
6   18.720000  -9.290000       3.100000                 23.010000  
7   23.750000   7.100000       3.120000                 22.530000  
8   20.370000  -4.320000       3.180000                 21.990000  
9   30.600000   9.410000       3.320000                 21.810000  
10  27.810000   4.120000       3.390000                 21.040000  
11  18.240000   2.830000       3.600000                 21.420000  
12  17.210000  -6.130000       3.600000                 20.740000  
13  28.930000   4.810000       4.750000                 25.560000  
14  21.350000  -1.410000       9.350000                 45.600000  
15  20.940000   0.790000      12.210000                 54.610000  
16  11.750000 -18.290000      13.100000                 55.460000  
17  17.530000 -17.110000      14.400000                 57.200000  
18  28.860000   9.300000      14.950000                 55.240000  
19  24.630000   2.510000      25.100000                 82.510000  
20  24.120000   2.600000      37.420000                109.510000  
21  26.240000   0.960000      35.750000                 94.830000  
22  31.990000  20.000000      31.830000                 79.500000  
23  42.460000  30.010000      29.080000                 70.340000  
24  18.320000  -3.280000      28.750000                 66.670000  
25  14.700000 -11.920000      26.920000                 60.270000  
26  24.250000  -0.720000      14.440000                 31.720000  
27  19.710000 -11.870000      17.750000                 37.620000  
28  17.100000  -5.790000      14.870000                 30.330000  
29  14.950000  -9.010000      18.330000                 35.600000  
..        ...        ...            ...                       ...  
31  19.790000  -5.360000      20.200000                 35.730000  
32  23.230000  -7.910000      19.250000                 33.040000  
33  26.700000   7.270000      16.750000                 27.940000  
34  17.540000  -8.860000      15.660000                 25.440000  
35  35.200000  20.590000      16.750000                 26.480000  
36  32.510000  12.480000      20.460000                 31.400000  
37  20.600000  -6.670000      18.640000                 27.980000  
38  36.410000  29.060000      11.910000                 17.600000  
39  17.630000  -5.380000      16.560000                 23.890000  
40  21.430000  -4.560000      27.390000                 38.290000  
41  23.020000  -7.580000      23.000000                 31.300000  
42  18.680000 -10.150000      22.810000                 30.520000  
43  21.630000  -0.900000      27.690000                 36.260000  
44  21.760000  -6.820000      37.660000                 47.980000  
45  30.100000  16.070000      50.040000                 61.650000  
46  23.610000  -0.670000      58.300000                 69.640000  
47  13.840000 -18.640000      64.200000                 74.440000  
48  17.850000 -14.240000      91.480000                102.000000  
49  17.050000 -10.550000      53.480000                 59.930000  
50  31.180000  13.930000      71.210000                 78.650000  
51  18.790000   2.910000      87.040000                 93.210000  
52  23.450000  -3.950000      86.460000                 90.720000  
53   7.930000 -22.270000      91.170000                 94.250000  
54  19.900000 -20.260000      85.600000                 87.050000  
55  15.050000 -16.660000      41.850000                 42.530000  
56  26.040000   1.290000      46.123463                 45.343343  
57  21.873908   0.618620      50.270318                 47.865574  
58  20.711858  -2.385896      53.639684                 49.564252  
59  20.381871  -5.337564      56.231562                 51.262929  
60  20.405636  -7.963284      58.823440                 52.961607  

[61 rows x 16 columns]

In [10]:
# Using time series prediction of future feature data
# k is the number of past years used for prediction
k=3
all_x = np.zeros((50 - k, 3))
all_y = np.zeros((50 - k, 1))

for i in range(0, 50 - k):
    all_x[i, :] = data['ENPRP'][i:i+k].T
    all_y[i, :] = data['ENPRP'][i+k]
# split data for train and test    
train_x, test_x, train_y, test_y = train_test_split(all_x, all_y, test_size=0.2)

In [11]:
# Show how close time series predicts the data compared to test data
regr = linear_model.LinearRegression()
regr.fit(train_x, train_y)

plt.plot(regr.predict(test_x))
plt.plot(test_y)
plt.legend(['Prediction', 'ENPRP Real'], bbox_to_anchor=(1.45, 1))
plt.show()



In [12]:
# use time series to predict next five years
future_x = np.copy(data['ENPRP'][-k:].values.reshape(1, -1))
pred_y = np.zeros((5, 1))
for i in range(5):
    future_y = regr.predict(future_x)
    future_x[:, 0:2] = future_x[:, 1:3]
    future_x[:, 2] = future_y
    pred_y[i, :] = future_y[0, 0]


plt.figure()
plt.plot(np.arange(1960 + k, 2010), all_y)
plt.plot(np.arange(2010, 2015), pred_y)
plt.legend(['History', 'ENPRP Predict'], bbox_to_anchor=(1.45, 1))
plt.show()



In [ ]: