In [1]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split

In [2]:
# read data
data = pd.read_csv("CA.csv")
year1 = data[['Year']][:44]
#print(year1.shape)
year2 = data[['Year']][-11:]

# predict solar for future
year3 = year2 = data[['Year']][-6:]
year3 = year3.set_index([[0, 1, 2, 3, 4, 5]])

statelist=["AK","AL","AR","AZ","CA","CO","CT","DE","FL","GA","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY"]
print(len(statelist))

future = year3
# do ridge regression on train data

for i in range(49):
    data = pd.read_csv('%s.csv' % (statelist[i]))
    
    year1 = data[['Year']][:44]
    #print(year1.shape)
    year2 = data[['Year']][-11:]
    # Split data for train and test
    #print(i)
    all_x = data[['GDP','CLPRB','EMFDB','ENPRP','NGMPB','PAPRB','PCP','ZNDX','Nominal Price', 'Inflation Adjusted Price']][0:55]
    all_y = data[['SOEGP']][0:55]
    train_x, test_x, train_y, test_y = train_test_split(all_x, all_y, test_size=0.2)
    regr2 = linear_model.Ridge(alpha = 0.75)
    regr2.fit(train_x, train_y)
    # predict SOEGP for future
    #year3 = data[['Year']][-6:]
    #year3 = year3.set_index([[0, 1, 2, 3, 4, 5]])
    #print(year3)
    future_x = data[['GDP','CLPRB','EMFDB','ENPRP','NGMPB','PAPRB','PCP','ZNDX','Nominal Price', 'Inflation Adjusted Price']][-6:]
    pred = pd.DataFrame(regr2.predict(future_x).clip(min=0))
    pred.columns = [statelist[i]]
    #print(pred)
    future = pd.concat([future, pred], axis=1)
    #print(future)
print(future)

# output to csv
future.to_csv('SolarPreds.csv', encoding='utf-8', index=False)


49
     Year   AK   AL   AR            AZ           CA           CO        CT  \
0  2015.0  0.0  0.0  0.0   3931.712427  1329.570911   327.586983  0.177262   
1  2016.0  0.0  0.0  0.0   5397.210633  1404.752885   424.662343  0.471935   
2  2017.0  0.0  0.0  0.0   7210.633513  1305.139609   540.233690  0.579335   
3  2018.0  0.0  0.0  0.0   9452.221571  1778.132417   679.758123  0.587407   
4  2019.0  0.0  0.0  0.0  12192.074248  2177.124532   847.198335  0.945495   
5  2020.0  0.0  0.0  0.0  15556.356376  2635.171669  1044.231619  1.204404   

         DE         FL ...    SD          TN          TX        UT   VA  \
0  1.385994   0.000000 ...   0.0   38.402701  169.015649  0.562682  0.0   
1  4.791977  12.521007 ...   0.0   53.266835  166.496799  0.557768  0.0   
2  6.661490  22.433161 ...   0.0   72.328805  158.932933  0.622683  0.0   
3  6.319022  29.912991 ...   0.0   96.917034  163.132028  0.707842  0.0   
4  8.092167  36.838367 ...   0.0  127.606017  154.912569  0.765259  0.0   
5  9.417947  44.177388 ...   0.0  165.982623  156.864242  0.840815  0.0   

         VT        WA        WI   WV   WY  
0  0.467498  0.597658  1.017109  0.0  0.0  
1  0.676860  0.678020  1.356138  0.0  0.0  
2  0.267833  0.793141  1.787602  0.0  0.0  
3  0.732461  0.893012  2.335792  0.0  0.0  
4  0.604715  0.990719  3.033618  0.0  0.0  
5  0.537138  1.111321  3.908971  0.0  0.0  

[6 rows x 50 columns]

In [ ]: