In [2]:
import pandas as pd
import copy
from datetime import datetime,timedelta
import numpy as np
import json
import pickle
In [3]:
import yaml
with open('../config.yaml') as f:
cf = yaml.safe_load(f)
In [4]:
def get_gen_df_on_date(day,month,year):
url = 'http://www.cndc.org.ni/consultas/reportesDiarios/postDespacho/consultaPostdespacho.php?fecha='+day+'/'+month+'/'+year
response = urllib2.urlopen(url)
data = response.read()
soup = BeautifulSoup(data)
header = soup.findAll('tr')[1].findAll(class_='trEncabezado3')
columns = ['HORA']
for column in header:
columns.append(column.get_text().encode('ascii','ignore'))
columns.append('Demanda')
columns.append('Date')
rows = soup.findAll(class_='trRegistro')
df_list = []
for row in rows:
row_text = []
row_vals = row.findAll('td')
for val in row_vals:
row_text.append(val.get_text().encode('ascii','ignore'))
row_text.append(day+'/'+month+'/'+year)
df_list.append(row_text)
df = pd.DataFrame(df_list,columns=columns)
return df
In [5]:
#Uncomment the code below to scrape demand/generation data.
'''
years = [2012,2013,2014]
months = [1,2,3,4,5,6,7,8,9,10,11,12]
dfs = []
print datetime.now()
for year in years:
for month in months:
num_days = monthrange(year, month)[1]
for day in range(1,num_days+1):
dfs.append(get_gen_df_on_date(str(day),str(month),str(year)))
df = pd.concat(dfs)
dfs = []
df.to_csv(str(month)+'_'+str(year)+'.csv')
print datetime.now()
'''
pass
In [6]:
years = [2012,2013,2014]
months = [1,2,3,4,5,6,7,8,9,10,11,12]
dfs = []
for year in years:
for month in months:
dfs.append(pd.read_csv('../data/generation/'+str(month)+'_'+str(year)+'.csv'))
df_orig = pd.concat(dfs)
df_orig = df_orig.reset_index(drop=True)
Missing Data
There are 52 missing data points from different generation facilities within the 2012-2014 demand/generation dataset. The way this occurs is that the html table that got scraped from the website shifted all values when one cell was missing data. Therefore, all the values had to be shifted back.
In [7]:
df_gen = copy.deepcopy(df_orig)
datetime_array = []
date_array = []
month_array = []
weekday_array = []
day_array = []
demand_array = []
tcpi_array = []
date_split = ''
for i,val in enumerate(df_gen.iterrows()):
row = val[1]
if(row['Demanda']=='4/2/2013'):
row['Date'] = '4/2/2013'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['SND-L9090'] = None
if(row['Demanda']=='28/4/2013'):
row['Date'] = '28/4/2013'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['SND-L9090'] = None
if(row['Demanda']=='25/7/2012'):
row['Date'] = '25/7/2012'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = row['PCHN']
row['PCHN'] = row['EEC20']
row['EEC20'] = row['EEC']
row['EEC'] = row['CEN']
row['CEN'] = row['PBP']
row['PBP'] = None
if(row['Demanda']=='27/12/2012'):
row['Date'] = '27/12/2012'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['SND-L9090'] = None
if(row['Demanda']=='3/6/2014'):
row['Date'] = '3/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = None
if(row['Demanda']=='4/6/2014'):
row['Date'] = '4/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = None
if(row['Demanda']=='5/6/2014'):
row['Date'] = '5/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = row['PCHN']
row['PCHN'] = row['EEC20']
row['EEC20'] = row['EEC']
row['EEC'] = row['CEN']
row['CEN'] = row['PBP']
row['PBP'] = row['AMY2']
row['AMY2'] = row['AMY1']
row['AMY1'] = row['PHC2']
row['PHC2'] = row['PHC1']
row['PHC1'] = row['PCG9']
row['PCG9'] = row['PCG8']
row['PCG8'] = row['PCG7']
row['PCG7'] = row['PCG6']
row['PCG6'] = row['PCG5']
row['PCG5'] = None
if(row['TCPI-L9150']=='9/6/2014'):
row['Date'] = '9/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = None
row['PLB1'] = None
if(row['Demanda']=='10/6/2014'):
row['Date'] = '10/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = None
if(row['Demanda']=='12/6/2014'):
row['Date'] = '12/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = None
if(row['Demanda']=='15/6/2014'):
row['Date'] = '15/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = None
if(row['Demanda']=='24/6/2014'):
row['Date'] = '24/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = None
if(row['Demanda']=='27/6/2014'):
row['Date'] = '27/6/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = row['PCHN']
row['PCHN'] = row['EEC20']
row['EEC20'] = row['EEC']
row['EEC'] = row['CEN']
row['CEN'] = row['PBP']
row['PBP'] = row['AMY2']
row['AMY2'] = row['AMY1']
row['AMY1'] = row['PHC2']
row['PHC2'] = row['PHC1']
row['PHC1'] = row['PCG9']
row['PCG9'] = row['PCG8']
row['PCG8'] = row['PCG7']
row['PCG7'] = row['PCG6']
row['PCG6'] = row['PCG5']
row['PCG5'] = None
if(row['Demanda']=='1/7/2014'):
row['Date'] = '1/7/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = None
if(row['Demanda']=='7/7/2014'):
row['Date'] = '7/7/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = row['PCHN']
row['PCHN'] = row['EEC20']
row['EEC20'] = row['EEC']
row['EEC'] = row['CEN']
row['CEN'] = row['PBP']
row['PBP'] = row['AMY2']
row['AMY2'] = row['AMY1']
row['AMY1'] = row['PHC2']
row['PHC2'] = None
if(row['Demanda']=='11/7/2014'):
row['Date'] = '11/7/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = None
if(row['Demanda']=='12/7/2014'):
row['Date'] = '12/7/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = None
if(row['Demanda']=='19/7/2014'):
row['Date'] = '19/7/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = row['PCHN']
row['PCHN'] = row['EEC20']
row['EEC20'] = row['EEC']
row['EEC'] = row['CEN']
row['CEN'] = row['PBP']
row['PBP'] = row['AMY2']
row['AMY2'] = row['AMY1']
row['AMY1'] = row['PHC2']
row['PHC2'] = row['PHC1']
row['PHC1'] = row['PCG9']
row['PCG9'] = row['PCG8']
row['PCG8'] = row['PCG7']
row['PCG7'] = row['PCG6']
row['PCG6'] = row['PCG5']
row['PCG5'] = row['PCG4']
row['PCG4'] = row['PCG3']
row['PCG3'] = row['PCG2']
row['PCG2'] = row['PCG1']
row['PCG1'] = None
if(row['Demanda']=='20/7/2014'):
row['Date'] = '20/7/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = row['PCHN']
row['PCHN'] = row['EEC20']
row['EEC20'] = row['EEC']
row['EEC'] = row['CEN']
row['CEN'] = row['PBP']
row['PBP'] = row['AMY2']
row['AMY2'] = row['AMY1']
row['AMY1'] = row['PHC2']
row['PHC2'] = None
if(row['TCPI-L9150']=='21/7/2014'):
row['Date'] = '21/7/2014'
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = row['PNI1']
row['PNI1'] = row['PCHN']
row['PCHN'] = row['EEC20']
row['EEC20'] = row['EEC']
row['EEC'] = row['CEN']
row['CEN'] = row['PBP']
row['PBP'] = row['AMY2']
row['AMY2'] = row['AMY1']
row['AMY1'] = row['PHC2']
row['PHC2'] = row['PHC1']
row['PHC1'] = row['PCG9']
row['PCG9'] = row['PCG8']
row['PCG8'] = row['PCG7']
row['PCG7'] = row['PCG6']
row['PCG6'] = row['PCG5']
row['PCG5'] = None
row['Demanda'] = row['TCPI-L9150']
row['TCPI-L9150'] = row['AMY-L9030']
row['AMY-L9030'] = row['SND-L9090']
row['LNI-L9040'] = row['AMY-L9030']
row['TPC'] = row['LNI-L9040']
row['LNI-9040'] = row['TPC']
row['TPC'] = row['PMT3']
row['PMT3'] = row['PMT2']
row['PMT2'] = row['PMT1']
row['PMT1'] = row['PMG5']
row['PMG5'] = row['PMG4']
row['PMG4'] = row['PMG3']
row['PMG3'] = row['PLB2']
row['PLB2'] = row['PLB1']
row['PLB1'] = row['PEN3']
row['PEN3'] = row['PEN1 y 2']
row['PEN1 y 2'] = row['PCF2']
row['PCF2'] = row['PCF1']
row['PCF1'] = row['PCA2']
row['PCA2'] = row['PCA1']
row['PCA1'] = row['NSL']
row['NSL'] = row['MTR']
row['MTR'] = row['GSR']
row['GSR'] = row['PNI2']
row['PNI2'] = None
try:
date_split = row['Date'].split('/')
except:
print row
hour = row['HORA']
date = datetime(int(float(date_split[2])),int(float(date_split[1])),int(float(date_split[0])))
date_time = datetime(int(float(date_split[2])),int(float(date_split[1])),int(float(date_split[0])),int(float(hour)))
month = int(float(date_split[0]))
weekday = int(float(date_time.weekday()))
day = int(float(date_time.day))
datetime_array.append(date_time)
date_array.append(date)
month_array.append(month)
weekday_array.append(weekday)
day_array.append(day)
demand_array.append(row['Demanda'])
tcpi_array.append(row['TCPI-L9150'])
df_gen['TCPI-L9150'] = tcpi_array
df_gen['TCPI-L9150'] = df_gen['TCPI-L9150'].astype(np.float64)
df_gen['Demanda'] = demand_array
df_gen['Demanda'] = df_gen['Demanda'].astype(np.float64)
df_gen['datetime']=datetime_array
df_gen['date']=date_array
df_gen['month']=month_array
df_gen['weekday']=weekday_array
df_gen['day']=day_array
df_gen = df_gen.set_index('datetime').drop('Date',1).drop('Unnamed: 0',1)
df_gen['HORA'] = df_gen[['HORA']].astype(int)
In [8]:
#Uncomment the code below to use the api.
'''
api_key = cf['api_key']
years = ['2009','2010']
months= ['01','02','03','04','05','06','07','08','09','10','11','12']
for year in years:
for month in months:
url = 'http://api.worldweatheronline.com/premium/v1/past-weather.ashx?q=Managua&format=json&date='+year+'-'+month+'-01&enddate=2015-02-01&includelocation=yes&key='+api_key
response = urllib.urlopen(url);
print url
weather_data = json.loads(response.read())
with open('../weather/managua_'+year+'_'+month+'.json', 'w') as outfile:
json.dump(weather_data, outfile)
'''
pass
In [9]:
years = ['2011','2012','2013','2014']
months= ['01','02','03','04','05','06','07','08','09','10','11','12']
managua_weather = []
for year in years:
for month in months:
with open('../../weather/managua_'+year+'_'+month+'.json', 'r') as infile:
managua = json.load(infile)
for weather in managua['data']['weather']:
managua_weather.append(weather)
In [10]:
weather_single_list = []
for day in managua_weather:
date_array = day['date'].split('-')
date = datetime(int(float(date_array[0])),int(float(date_array[1])),int(float(date_array[2])))
for hour_data in day['hourly']:
hour = int(float(hour_data['time'])/100)
hour_data['datetime']=date.replace(hour=hour)
weather_single_list.append(hour_data)
df_temp = pd.DataFrame(weather_single_list, columns=hour_data.keys())
df_temp = df_temp.set_index('datetime')
for column in df_temp:
if column not in ['weatherIconUrl','winddir16Point','weatherDesc']:
df_temp[column] = df_temp[column].astype(float)
df_temp_resampled = df_temp.resample('1H').apply(pd.Series.interpolate)
df_temp_resampled['precipMM'] = df_temp.resample('1H').fillna(0)
df_temp = df_temp_resampled
df_temp = df_temp.drop('time',1)
In [11]:
def dow_date_finder(which_weekday_in_month=0,day=3,month=4,year=2015):
dt = datetime(year,month,1)
dow_lst = []
while dt.weekday() != day:
dt = dt + timedelta(days=1)
while dt.month == month:
dow_lst.append(dt)
dt = dt + timedelta(days=7)
return dow_lst[which_weekday_in_month] # may raise an exception if slicing is wrong
holiday_dates = []
for year in years:
holiday_dates.append(datetime(int(float(year)),1,2))#New Years Day
holiday_dates.append(datetime(int(float(year)),2,1))#Air Force Day
holiday_dates.append(dow_date_finder(0,3,4,int(float(year))))#Holy Thursday
holiday_dates.append(datetime(int(float(year)),5,1))#Labour Day
holiday_dates.append(datetime(int(float(year)),5,27))#Army Day
holiday_dates.append(datetime(int(float(year)),7,19))#Liberation Day
holiday_dates.append(datetime(int(float(year)),7,25))#Fiesta de Santiago
holiday_dates.append(datetime(int(float(year)),7,26))#Fiesta de Santa Ana
holiday_dates.append(datetime(int(float(year)),8,1))#Fiesta de Santa Domingo
holiday_dates.append(datetime(int(float(year)),9,14))#Battle of San Jacinto
holiday_dates.append(datetime(int(float(year)),9,15))#Independence Day
holiday_dates.append(datetime(int(float(year)),10,12))#Indigenous Resistance Day
#holiday_dates.append(datetime(int(float(year)),12,7))#La Griteria Immaculate (Leon only)
holiday_dates.append(datetime(int(float(year)),12,8))#La Griteria Immaculate
holiday_dates.append(datetime(int(float(year)),12,25))#Christmas
holiday_dates.append(datetime(int(float(year)),12,31))#New Years Eve
holiday_dates = [date for date in holiday_dates if df_gen.date[0]<date and df_gen.date[-1]>date]
In [12]:
df_gen['holiday'] = 0
for holiday in holiday_dates:
df_gen.loc[df_gen['date']==holiday,'holiday'] = 1
df_gen['weekend'] = 0
df_gen.loc[df_gen['weekday']>4,'weekend'] = 1
df_gen['payday'] = 0
df_gen.loc[df_gen['day']==15,'payday'] = 1
df_gen['timeinweek']=24*df_gen['weekday']+df_gen['HORA']
df_gen['wd_peak'] = 1
df_gen.loc[df_gen['HORA']<9,'wd_peak'] = 0
df_gen.loc[df_gen['HORA']>21,'wd_peak'] = 0
df_gen.loc[df_gen['weekend']==1,'wd_peak'] = 0
df_gen['we_peak'] = 1
df_gen.loc[df_gen['HORA']<18,'we_peak'] = 0
df_gen.loc[df_gen['HORA']>21,'we_peak'] = 0
df_gen.loc[df_gen['weekend']==0,'we_peak'] = 0
df_gen['HOURS_PSR'] = df_gen['HORA'] - 6
df_gen.loc[df_gen['HOURS_PSR']<0,'HOURS_PSR'] = df_gen.loc[df_gen['HOURS_PSR']<0,'HOURS_PSR'] + 24
df_gen['HOURS_PSS'] = df_gen['HORA'] - 18
df_gen.loc[df_gen['HOURS_PSS']<0,'HOURS_PSS'] = df_gen.loc[df_gen['HOURS_PSS']<0,'HOURS_PSS'] + 24
In [16]:
#changed EOLO to EOL, EEC50 to EEC, and TPT to TPC
#changed PMGA to PMG, PCH1 to PCHN
df_gen['Albanisa_Bunker'] = df_gen[['PCG1','PCG2','PCG3','PCG4','PCG5','PCG6','PCG7','PCG8','PCG9','PHC1','PHC2']].sum(axis=1)
df_gen['AEI_Wind'] = df_gen[['AMY1','AMY2']].sum(axis=1)
df_gen['Gruo_Terra_Wind'] = df_gen[['PBP']].sum(axis=1)
df_gen['Other_Wind'] = df_gen[['ABR','EOL']].sum(axis=1)
df_gen['AEI_Bunker'] = df_gen[['CEN','EEC20','EEC','TPC']].sum(axis=1)
df_gen['Enel_Hydro'] = df_gen[['PCA1','PCA2','PCF1','PCF2']].sum(axis=1)
df_gen['Enel_Bunker'] = df_gen[['PLB1','PLB2','PMG3','PMG4','PMG5']].sum(axis=1)
df_gen['Other_Bunker'] = df_gen[['PCHN','PNI1','PNI2','GSR']].sum(axis=1)
df_gen['Hidropanasma_Hydro'] = df_gen[['HPA1','HPA2']].sum(axis=1)
bunker_fuel = ['PCG1','PCG2','PCG3','PCG4','PCG5','PCG6','PCG7','PCG8','PCG9','PHC1','PHC2','CEN','EEC20','EEC','TPC','PLB1','PLB2','PMG3','PMG4','PMG5','PCHN','PNI1','PNI2','GSR']
interconnect = ['LNI-L9040','SND-L9090','AMY-L9030','TCPI-L9150']
hydro = ['PCA1','PCA2','PCF1','PCF2','HPA1','HPA2']
geothermal = ['PEN1 y 2','PEN3','PEN4','PMT1','PMT2']
biomass = ['MTR','NSL']
wind = ['AMY1','AMY2','ABR','EOL','PBP']
df_gen['bunker'] = df_gen[bunker_fuel].sum(axis=1)
df_gen['interconnect'] = df_gen[interconnect].sum(axis=1)
df_gen['hydro'] = df_gen[hydro].sum(axis=1)
df_gen['geothermal'] = df_gen[geothermal].sum(axis=1)
df_gen['biomass'] = df_gen[biomass].sum(axis=1)
df_gen['wind'] = df_gen[wind].sum(axis=1)
df_big_gen = pd.DataFrame([df_gen['wind'],df_gen['biomass'],df_gen['interconnect'],df_gen['geothermal'],df_gen['bunker'],df_gen['hydro']]).T
In [10]:
demand_keys = ['Demanda','HORA','date','month','day','weekday','holiday','payday','wd_peak','we_peak','HOURS_PSR','HOURS_PSS','weekend','timeinweek']
df_demand = pd.merge(df_temp,df_gen[demand_keys],how='inner',left_index=True,right_index=True)
df_all = pd.merge(df_temp,df_gen,how='inner',left_index=True,right_index=True)
In [18]:
with open('../data/df_demand.pkl','w') as f:
pickle.dump(df_demand,f)
with open('../data/df_all.pkl','w') as f:
pickle.dump(df_all,f)
In [20]:
df_all.to_csv('steve.xls')
In [9]:
In [ ]: