In [1]:
# import data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split
In [2]:
# read data
data = pd.read_csv("CA.csv")
print(data)
year1 = data[['Year'][-10:]]
#print(year1)
year2 = data[['Year'][:-10]]
Unnamed: 0 Year HYTCP WYTCP SOEGP NUETP GDP \
0 0 1960.0 17445.0 0.0 0.0 0.0 5.143560e+04
1 1 1961.0 15368.0 5.0 0.0 0.0 5.689940e+04
2 2 1962.0 22835.0 7.0 0.0 0.0 6.236320e+04
3 3 1963.0 25334.0 193.0 0.0 0.0 6.780900e+04
4 4 1964.0 22098.0 367.0 0.0 0.0 7.351300e+04
5 5 1965.0 30523.0 270.0 0.0 0.0 7.825300e+04
6 6 1966.0 26236.0 163.0 0.0 0.0 8.462700e+04
7 7 1967.0 35277.0 563.0 0.0 0.0 8.957100e+04
8 8 1968.0 27175.0 1505.0 0.0 0.0 9.872100e+04
9 9 1969.0 40418.0 2458.0 0.0 0.0 1.066290e+05
10 10 1970.0 38082.0 3132.0 0.0 0.0 1.119850e+05
11 11 1971.0 39018.0 3519.0 0.0 0.0 1.203720e+05
12 12 1972.0 31755.0 3175.0 0.0 0.0 1.331430e+05
13 13 1973.0 38754.0 2631.0 0.0 0.0 1.472310e+05
14 14 1974.0 46422.0 3698.0 0.0 0.0 1.618150e+05
15 15 1975.0 40103.0 6071.0 0.0 0.0 1.783940e+05
16 16 1976.0 23193.0 4807.0 0.0 0.0 1.973470e+05
17 17 1977.0 14251.0 8115.0 0.0 0.0 2.295920e+05
18 18 1978.0 37206.0 7659.0 0.0 0.0 2.628030e+05
19 19 1979.0 33920.0 8762.0 0.0 0.0 2.934910e+05
20 20 1980.0 40780.0 4920.0 0.0 0.0 3.279580e+05
21 21 1981.0 29764.0 3206.0 0.0 0.0 3.688080e+05
22 22 1982.0 50226.0 3735.0 0.0 0.0 3.937880e+05
23 23 1983.0 56885.0 5613.0 0.0 1.0 4.261430e+05
24 24 1984.0 43159.0 14144.0 5.0 4.0 4.821660e+05
25 25 1985.0 31717.0 19729.0 11.0 3.0 5.239060e+05
26 26 1986.0 41459.0 26215.0 14.0 3.0 5.630820e+05
27 27 1987.0 24564.0 30387.0 10.0 4.0 6.153590e+05
28 28 1988.0 23474.0 30863.0 9.0 1.0 6.715750e+05
29 29 1989.0 30801.0 32519.0 250.0 2079.0 7.229790e+05
.. ... ... ... ... ... ... ...
31 31 1991.0 21957.0 31542.0 471.0 2915.0 7.900460e+05
32 32 1992.0 20167.0 35244.0 399.0 2864.0 8.073580e+05
33 33 1993.0 40493.0 31581.0 462.0 2984.0 8.264470e+05
34 34 1994.0 23013.0 33752.0 486.0 3387.0 8.613600e+05
35 35 1995.0 48033.0 30246.0 497.0 3087.0 9.115770e+05
36 36 1996.0 44751.0 34097.0 521.0 3079.0 9.641860e+05
37 37 1997.0 41055.0 30512.0 511.0 3137.0 1.037091e+06
38 38 1998.0 49548.0 34594.0 502.0 2758.0 1.151119e+06
39 39 1999.0 40737.0 33372.0 495.0 3230.0 1.248006e+06
40 40 2000.0 38334.0 35176.0 493.0 3518.0 1.362995e+06
41 41 2001.0 25542.0 33220.0 542.0 3500.0 1.376163e+06
42 42 2002.0 31141.0 34352.0 554.0 3803.0 1.431267e+06
43 43 2003.0 36371.0 35594.0 534.0 3895.0 1.523473e+06
44 44 2004.0 34141.0 30268.0 571.0 4306.0 1.645084e+06
45 45 2005.0 39632.0 36155.0 537.0 4262.0 1.766693e+06
46 46 2006.0 48047.0 31959.0 495.0 4883.0 1.879520e+06
47 47 2007.0 27328.0 35792.0 557.0 5585.0 1.956523e+06
48 48 2008.0 24128.0 32482.0 670.0 5385.0 1.983926e+06
49 49 2009.0 27888.0 31764.0 647.0 5840.0 1.912115e+06
50 50 2010.0 33431.0 32201.0 765.0 6079.0 1.965886e+06
51 51 2011.0 42557.0 36663.0 861.0 7752.0 2.036297e+06
52 52 2012.0 26837.0 18507.0 1328.0 9754.0 2.131199e+06
53 53 2013.0 23755.0 17912.0 3727.0 12822.0 2.223958e+06
54 54 2014.0 16531.0 16986.0 9834.0 12992.0 2.350807e+06
55 55 2015.0 NaN NaN NaN NaN 1.972377e+06
56 56 2016.0 NaN NaN NaN NaN 2.031505e+06
57 57 2017.0 NaN NaN NaN NaN 2.073136e+06
58 58 2018.0 NaN NaN NaN NaN 2.115467e+06
59 59 2019.0 NaN NaN NaN NaN 2.157770e+06
60 60 2020.0 NaN NaN NaN NaN 2.200074e+06
CLPRB EMFDB ENPRP NGMPB PAPRB \
0 0.000000 0.000000 0.000000 589695.145060 1.771042e+06
1 0.000000 0.000000 0.000000 633797.940590 1.737732e+06
2 0.000000 0.000000 0.000000 642889.456260 1.720222e+06
3 0.000000 0.000000 0.000000 736625.842790 1.745266e+06
4 0.000000 0.000000 0.000000 756639.938890 1.740052e+06
5 0.000000 0.000000 0.000000 752461.647380 1.835282e+06
6 0.000000 0.000000 0.000000 785759.223820 2.002711e+06
7 0.000000 0.000000 0.000000 776043.300260 2.083470e+06
8 0.000000 0.000000 0.000000 814570.862530 2.177877e+06
9 0.000000 0.000000 0.000000 772179.491550 2.176688e+06
10 0.000000 0.000000 0.000000 739623.684340 2.158708e+06
11 0.000000 0.000000 0.000000 700449.587690 2.079207e+06
12 0.000000 0.000000 0.000000 561777.951740 2.012728e+06
13 0.000000 0.000000 0.000000 507133.068650 1.949235e+06
14 0.000000 0.000000 0.000000 419143.681910 1.873417e+06
15 0.000000 0.000000 0.000000 365212.133350 1.868754e+06
16 0.000000 0.000000 0.000000 400198.670770 1.890922e+06
17 0.000000 0.000000 0.000000 353544.214460 2.027732e+06
18 0.000000 0.000000 0.000000 352083.582160 2.013650e+06
19 0.000000 0.000000 0.000000 282281.173410 2.043154e+06
20 0.000000 0.000000 0.000000 342208.328920 2.070153e+06
21 0.000000 0.000000 0.000000 414770.715990 2.232756e+06
22 0.000000 0.000000 0.000000 432487.213510 2.329118e+06
23 0.000000 0.000000 0.000000 463384.589870 2.347190e+06
24 0.000000 0.000000 0.000000 526963.163720 2.389716e+06
25 852.000000 578.033290 91.176730 546052.751980 2.458487e+06
26 0.000000 610.944910 96.713330 511410.722250 2.358657e+06
27 552.000000 667.850100 106.099830 468677.761570 2.295060e+06
28 648.000000 669.909760 106.810560 444232.977320 2.238875e+06
29 492.000000 631.336660 101.022230 404557.660430 2.112650e+06
.. ... ... ... ... ...
31 684.000000 618.236050 99.628730 414017.142190 2.035893e+06
32 1236.000000 651.364350 105.350780 402191.760080 2.018632e+06
33 0.000000 686.467180 111.416690 352455.970670 1.993628e+06
34 0.000000 753.990230 122.825320 340053.118510 1.992700e+06
35 0.000000 724.953310 118.511420 308223.983960 2.033979e+06
36 0.000000 297.700520 48.838590 320465.715730 2.011602e+06
37 0.000000 526.367570 86.658560 313734.335920 1.967981e+06
38 0.000000 620.437570 102.509640 349864.265080 1.913188e+06
39 0.000000 575.600310 95.240350 410698.817710 1.813770e+06
40 0.000000 691.267920 114.527590 390975.455400 1.775519e+06
41 0.000000 758.043380 125.754130 408479.078050 1.692243e+06
42 0.000000 1032.953090 171.582900 394507.109550 1.669199e+06
43 0.000000 1213.083940 202.393840 373313.527210 1.624000e+06
44 0.000000 1102.974250 184.838880 356231.719530 1.550108e+06
45 0.000000 2155.600700 362.789470 353819.535400 1.489718e+06
46 0.000000 5534.308260 935.595310 351100.499310 1.447460e+06
47 0.000000 12532.827110 2128.240820 339526.550440 1.399992e+06
48 0.000000 13309.875940 2270.016060 331492.568970 1.384408e+06
49 0.000000 6861.251470 1177.514910 309834.883070 1.328165e+06
50 76.270489 6625.601172 1137.323720 270557.609868 1.470517e+06
51 114.654193 6462.703992 1109.617441 244453.061373 1.536844e+06
52 134.308225 6359.118192 1091.691303 225563.599155 1.564214e+06
53 145.090308 6304.281471 1081.915282 210625.372980 1.571620e+06
54 151.436417 6288.444013 1078.659354 197851.727459 1.568798e+06
55 155.318094 6304.293457 1081.108508 186263.343209 1.560733e+06
56 158.214000 6346.142410 1088.447732 175324.552241 1.549981e+06
57 160.617019 6409.115963 1099.047002 164741.230111 1.537854e+06
58 162.527153 6489.151694 1112.906320 154352.895510 1.525021e+06
59 164.437286 6582.999665 1128.395660 144070.818157 1.511826e+06
60 166.347419 6687.409935 1146.330036 133847.346607 1.498447e+06
PCP ZNDX Nominal Price Inflation Adjusted Price
0 21.770000 -4.360000 2.910000 23.720000
1 16.530000 -6.810000 2.850000 22.960000
2 22.100000 -0.680000 2.850000 22.690000
3 24.740000 9.900000 2.910000 22.900000
4 22.270000 2.690000 3.000000 23.300000
5 21.980000 4.630000 3.010000 23.000000
6 18.720000 -9.290000 3.100000 23.010000
7 23.750000 7.100000 3.120000 22.530000
8 20.370000 -4.320000 3.180000 21.990000
9 30.600000 9.410000 3.320000 21.810000
10 27.810000 4.120000 3.390000 21.040000
11 18.240000 2.830000 3.600000 21.420000
12 17.210000 -6.130000 3.600000 20.740000
13 28.930000 4.810000 4.750000 25.560000
14 21.350000 -1.410000 9.350000 45.600000
15 20.940000 0.790000 12.210000 54.610000
16 11.750000 -18.290000 13.100000 55.460000
17 17.530000 -17.110000 14.400000 57.200000
18 28.860000 9.300000 14.950000 55.240000
19 24.630000 2.510000 25.100000 82.510000
20 24.120000 2.600000 37.420000 109.510000
21 26.240000 0.960000 35.750000 94.830000
22 31.990000 20.000000 31.830000 79.500000
23 42.460000 30.010000 29.080000 70.340000
24 18.320000 -3.280000 28.750000 66.670000
25 14.700000 -11.920000 26.920000 60.270000
26 24.250000 -0.720000 14.440000 31.720000
27 19.710000 -11.870000 17.750000 37.620000
28 17.100000 -5.790000 14.870000 30.330000
29 14.950000 -9.010000 18.330000 35.600000
.. ... ... ... ...
31 19.790000 -5.360000 20.200000 35.730000
32 23.230000 -7.910000 19.250000 33.040000
33 26.700000 7.270000 16.750000 27.940000
34 17.540000 -8.860000 15.660000 25.440000
35 35.200000 20.590000 16.750000 26.480000
36 32.510000 12.480000 20.460000 31.400000
37 20.600000 -6.670000 18.640000 27.980000
38 36.410000 29.060000 11.910000 17.600000
39 17.630000 -5.380000 16.560000 23.890000
40 21.430000 -4.560000 27.390000 38.290000
41 23.020000 -7.580000 23.000000 31.300000
42 18.680000 -10.150000 22.810000 30.520000
43 21.630000 -0.900000 27.690000 36.260000
44 21.760000 -6.820000 37.660000 47.980000
45 30.100000 16.070000 50.040000 61.650000
46 23.610000 -0.670000 58.300000 69.640000
47 13.840000 -18.640000 64.200000 74.440000
48 17.850000 -14.240000 91.480000 102.000000
49 17.050000 -10.550000 53.480000 59.930000
50 31.180000 13.930000 71.210000 78.650000
51 18.790000 2.910000 87.040000 93.210000
52 23.450000 -3.950000 86.460000 90.720000
53 7.930000 -22.270000 91.170000 94.250000
54 19.900000 -20.260000 85.600000 87.050000
55 15.050000 -16.660000 41.850000 42.530000
56 26.040000 1.290000 46.123463 45.343343
57 21.873908 0.618620 50.270318 47.865574
58 20.711858 -2.385896 53.639684 49.564252
59 20.381871 -5.337564 56.231562 51.262929
60 20.405636 -7.963284 58.823440 52.961607
[61 rows x 16 columns]
In [10]:
# Using time series prediction of future feature data
# k is the number of past years used for prediction
k=3
all_x = np.zeros((50 - k, 3))
all_y = np.zeros((50 - k, 1))
for i in range(0, 50 - k):
all_x[i, :] = data['ENPRP'][i:i+k].T
all_y[i, :] = data['ENPRP'][i+k]
# split data for train and test
train_x, test_x, train_y, test_y = train_test_split(all_x, all_y, test_size=0.2)
In [11]:
# Show how close time series predicts the data compared to test data
regr = linear_model.LinearRegression()
regr.fit(train_x, train_y)
plt.plot(regr.predict(test_x))
plt.plot(test_y)
plt.legend(['Prediction', 'ENPRP Real'], bbox_to_anchor=(1.45, 1))
plt.show()
In [12]:
# use time series to predict next five years
future_x = np.copy(data['ENPRP'][-k:].values.reshape(1, -1))
pred_y = np.zeros((5, 1))
for i in range(5):
future_y = regr.predict(future_x)
future_x[:, 0:2] = future_x[:, 1:3]
future_x[:, 2] = future_y
pred_y[i, :] = future_y[0, 0]
plt.figure()
plt.plot(np.arange(1960 + k, 2010), all_y)
plt.plot(np.arange(2010, 2015), pred_y)
plt.legend(['History', 'ENPRP Predict'], bbox_to_anchor=(1.45, 1))
plt.show()
In [ ]:
Content source: uwkejia/Clean-Energy-Outlook
Similar notebooks: