In [2]:
import pandas as pd
import copy
from datetime import datetime,timedelta
import numpy as np
import json
import pickle

In [3]:
import yaml
with open('../config.yaml') as f: 
    cf = yaml.safe_load(f)

In [4]:
def get_gen_df_on_date(day,month,year):
    url = 'http://www.cndc.org.ni/consultas/reportesDiarios/postDespacho/consultaPostdespacho.php?fecha='+day+'/'+month+'/'+year
    response = urllib2.urlopen(url)
    data = response.read()
    soup = BeautifulSoup(data)
    
    
    header = soup.findAll('tr')[1].findAll(class_='trEncabezado3')
    columns = ['HORA']
    for column in header:
        columns.append(column.get_text().encode('ascii','ignore'))
    columns.append('Demanda')
    columns.append('Date')

    rows = soup.findAll(class_='trRegistro')
    df_list = []
    for row in rows:
        row_text = []
        row_vals = row.findAll('td')
        for val in row_vals:
            row_text.append(val.get_text().encode('ascii','ignore'))
        row_text.append(day+'/'+month+'/'+year)
        df_list.append(row_text)
    df = pd.DataFrame(df_list,columns=columns)
    return df

In [5]:
#Uncomment the code below to scrape demand/generation data.
'''
years = [2012,2013,2014]
months = [1,2,3,4,5,6,7,8,9,10,11,12]
dfs = []
print datetime.now()
for year in years:
    for month in months:
        num_days = monthrange(year, month)[1]
        for day in range(1,num_days+1):
            dfs.append(get_gen_df_on_date(str(day),str(month),str(year)))
        df = pd.concat(dfs)
        dfs = []
        df.to_csv(str(month)+'_'+str(year)+'.csv')
        print datetime.now()
'''
pass

In [6]:
years = [2012,2013,2014]
months = [1,2,3,4,5,6,7,8,9,10,11,12]
dfs = []
for year in years:
    for month in months:  
        dfs.append(pd.read_csv('../data/generation/'+str(month)+'_'+str(year)+'.csv'))
df_orig = pd.concat(dfs)
df_orig = df_orig.reset_index(drop=True)

Missing Data

There are 52 missing data points from different generation facilities within the 2012-2014 demand/generation dataset. The way this occurs is that the html table that got scraped from the website shifted all values when one cell was missing data. Therefore, all the values had to be shifted back.

  • SND-L9090 from December 27th, 2012 from 0:00 - 10:00
  • PBP from July 25th,2012 from 0:00 - 13:00
  • SND-L9090 from February 4th, 2013 at 23:00
  • SND-L9090 from April 28th, 2013 from 16:00 - 23:00
  • PMG5 from June 3rd, 2014 at 7:00
  • PNI1 from June 4th, 2014 at 22:00
  • PCG5 from June 5th, 2014 at 6:00
  • PLB1,PLB2 from June 9th, 2014 at 7:00
  • NSL from June 10th, 2014 at 19:00
  • PMT2 from June 12th, 2014 at 17:00
  • PNI1 from June 15th, 2014 at 7:00
  • PMG5 from June 24th, 2014 at 15:00
  • PCG5 from June 27th, 2014 at 0:00
  • PNI1 from July 1st, 2014 at 22:00
  • PHC2 from July 7th, 2014 at 15:00
  • PLB2 from July 11th, 2014 at 12:00
  • PNI1 from July 12th, 2014 at 14:00
  • PCG1 from July 19th, 2014 at 7:00
  • PHC2 from July 20th, 2014 at 23:00
  • PCG5, PNI2 from July 21st, 2014 at 23:00

In [7]:
df_gen = copy.deepcopy(df_orig)
datetime_array = []
date_array = []
month_array = []
weekday_array = []
day_array = []
demand_array = []
tcpi_array = []
date_split = ''
for i,val in enumerate(df_gen.iterrows()):
    row = val[1]
    if(row['Demanda']=='4/2/2013'):
        row['Date'] = '4/2/2013'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['SND-L9090'] = None
    if(row['Demanda']=='28/4/2013'):
        row['Date'] = '28/4/2013'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['SND-L9090'] = None
    if(row['Demanda']=='25/7/2012'):
        row['Date'] = '25/7/2012'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = row['PCHN']
        row['PCHN'] = row['EEC20']
        row['EEC20'] = row['EEC']
        row['EEC'] = row['CEN']
        row['CEN'] = row['PBP']
        row['PBP'] = None
    if(row['Demanda']=='27/12/2012'):
        row['Date'] = '27/12/2012'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['SND-L9090'] = None
    if(row['Demanda']=='3/6/2014'):
        row['Date'] = '3/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = None
    if(row['Demanda']=='4/6/2014'):
        row['Date'] = '4/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = None
    if(row['Demanda']=='5/6/2014'):
        row['Date'] = '5/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = row['PCHN']
        row['PCHN'] = row['EEC20']
        row['EEC20'] = row['EEC']
        row['EEC'] = row['CEN']
        row['CEN'] = row['PBP']
        row['PBP'] = row['AMY2']
        row['AMY2'] = row['AMY1']
        row['AMY1'] = row['PHC2']
        row['PHC2'] = row['PHC1']
        row['PHC1'] = row['PCG9']
        row['PCG9'] = row['PCG8']
        row['PCG8'] = row['PCG7']
        row['PCG7'] = row['PCG6']
        row['PCG6'] = row['PCG5']
        row['PCG5'] = None
    if(row['TCPI-L9150']=='9/6/2014'):
        row['Date'] = '9/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = None
        row['PLB1'] = None
    if(row['Demanda']=='10/6/2014'):
        row['Date'] = '10/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = None
    if(row['Demanda']=='12/6/2014'):
        row['Date'] = '12/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = None
    if(row['Demanda']=='15/6/2014'):
        row['Date'] = '15/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = None
    if(row['Demanda']=='24/6/2014'):
        row['Date'] = '24/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = None
    if(row['Demanda']=='27/6/2014'):
        row['Date'] = '27/6/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = row['PCHN']
        row['PCHN'] = row['EEC20']
        row['EEC20'] = row['EEC']
        row['EEC'] = row['CEN']
        row['CEN'] = row['PBP']
        row['PBP'] = row['AMY2']
        row['AMY2'] = row['AMY1']
        row['AMY1'] = row['PHC2']
        row['PHC2'] = row['PHC1']
        row['PHC1'] = row['PCG9']
        row['PCG9'] = row['PCG8']
        row['PCG8'] = row['PCG7']
        row['PCG7'] = row['PCG6']
        row['PCG6'] = row['PCG5']
        row['PCG5'] = None
    if(row['Demanda']=='1/7/2014'):
        row['Date'] = '1/7/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = None
    if(row['Demanda']=='7/7/2014'):
        row['Date'] = '7/7/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = row['PCHN']
        row['PCHN'] = row['EEC20']
        row['EEC20'] = row['EEC']
        row['EEC'] = row['CEN']
        row['CEN'] = row['PBP']
        row['PBP'] = row['AMY2']
        row['AMY2'] = row['AMY1']
        row['AMY1'] = row['PHC2']
        row['PHC2'] = None
    if(row['Demanda']=='11/7/2014'):
        row['Date'] = '11/7/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = None
    if(row['Demanda']=='12/7/2014'):
        row['Date'] = '12/7/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = None
    if(row['Demanda']=='19/7/2014'):
        row['Date'] = '19/7/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = row['PCHN']
        row['PCHN'] = row['EEC20']
        row['EEC20'] = row['EEC']
        row['EEC'] = row['CEN']
        row['CEN'] = row['PBP']
        row['PBP'] = row['AMY2']
        row['AMY2'] = row['AMY1']
        row['AMY1'] = row['PHC2']
        row['PHC2'] = row['PHC1']
        row['PHC1'] = row['PCG9']
        row['PCG9'] = row['PCG8']
        row['PCG8'] = row['PCG7']
        row['PCG7'] = row['PCG6']
        row['PCG6'] = row['PCG5']
        row['PCG5'] = row['PCG4']
        row['PCG4'] = row['PCG3']
        row['PCG3'] = row['PCG2']
        row['PCG2'] = row['PCG1']
        row['PCG1'] = None
    if(row['Demanda']=='20/7/2014'):
        row['Date'] = '20/7/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = row['PCHN']
        row['PCHN'] = row['EEC20']
        row['EEC20'] = row['EEC']
        row['EEC'] = row['CEN']
        row['CEN'] = row['PBP']
        row['PBP'] = row['AMY2']
        row['AMY2'] = row['AMY1']
        row['AMY1'] = row['PHC2']
        row['PHC2'] = None
    if(row['TCPI-L9150']=='21/7/2014'):
        row['Date'] = '21/7/2014'
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = row['PNI1']
        row['PNI1'] = row['PCHN']
        row['PCHN'] = row['EEC20']
        row['EEC20'] = row['EEC']
        row['EEC'] = row['CEN']
        row['CEN'] = row['PBP']
        row['PBP'] = row['AMY2']
        row['AMY2'] = row['AMY1']
        row['AMY1'] = row['PHC2']
        row['PHC2'] = row['PHC1']
        row['PHC1'] = row['PCG9']
        row['PCG9'] = row['PCG8']
        row['PCG8'] = row['PCG7']
        row['PCG7'] = row['PCG6']
        row['PCG6'] = row['PCG5']
        row['PCG5'] = None
        row['Demanda'] = row['TCPI-L9150']
        row['TCPI-L9150'] = row['AMY-L9030']
        row['AMY-L9030'] = row['SND-L9090']
        row['LNI-L9040'] = row['AMY-L9030']
        row['TPC'] = row['LNI-L9040']
        row['LNI-9040'] = row['TPC']
        row['TPC'] = row['PMT3']
        row['PMT3'] = row['PMT2']
        row['PMT2'] = row['PMT1']
        row['PMT1'] = row['PMG5']
        row['PMG5'] = row['PMG4']
        row['PMG4'] = row['PMG3']
        row['PMG3'] = row['PLB2']
        row['PLB2'] = row['PLB1']
        row['PLB1'] = row['PEN3']
        row['PEN3'] = row['PEN1 y 2']
        row['PEN1 y 2'] = row['PCF2']
        row['PCF2'] = row['PCF1']
        row['PCF1'] = row['PCA2']
        row['PCA2'] = row['PCA1']
        row['PCA1'] = row['NSL']
        row['NSL'] = row['MTR']
        row['MTR'] = row['GSR']
        row['GSR'] = row['PNI2']
        row['PNI2'] = None
    try:
        date_split = row['Date'].split('/')
    except:
        print row
    hour = row['HORA']
    date = datetime(int(float(date_split[2])),int(float(date_split[1])),int(float(date_split[0])))
    date_time = datetime(int(float(date_split[2])),int(float(date_split[1])),int(float(date_split[0])),int(float(hour)))
    month = int(float(date_split[0]))  
    weekday = int(float(date_time.weekday()))
    day = int(float(date_time.day))
    datetime_array.append(date_time)
    date_array.append(date)
    month_array.append(month)
    weekday_array.append(weekday)
    day_array.append(day)
    demand_array.append(row['Demanda'])
    tcpi_array.append(row['TCPI-L9150'])
df_gen['TCPI-L9150'] = tcpi_array
df_gen['TCPI-L9150'] = df_gen['TCPI-L9150'].astype(np.float64)
df_gen['Demanda'] = demand_array
df_gen['Demanda'] = df_gen['Demanda'].astype(np.float64)

df_gen['datetime']=datetime_array
df_gen['date']=date_array
df_gen['month']=month_array
df_gen['weekday']=weekday_array
df_gen['day']=day_array
df_gen = df_gen.set_index('datetime').drop('Date',1).drop('Unnamed: 0',1)
df_gen['HORA'] = df_gen[['HORA']].astype(int)

In [8]:
#Uncomment the code below to use the api.
'''
api_key = cf['api_key']
years = ['2009','2010']
months= ['01','02','03','04','05','06','07','08','09','10','11','12']
for year in years:
    for month in months:
        url = 'http://api.worldweatheronline.com/premium/v1/past-weather.ashx?q=Managua&format=json&date='+year+'-'+month+'-01&enddate=2015-02-01&includelocation=yes&key='+api_key
        response = urllib.urlopen(url);
        print url
        weather_data = json.loads(response.read())
        with open('../weather/managua_'+year+'_'+month+'.json', 'w') as outfile:
            json.dump(weather_data, outfile)
'''
pass

In [9]:
years = ['2011','2012','2013','2014']
months= ['01','02','03','04','05','06','07','08','09','10','11','12']
managua_weather = []
for year in years:
    for month in months:
        with open('../../weather/managua_'+year+'_'+month+'.json', 'r') as infile:
            managua = json.load(infile)
        for weather in managua['data']['weather']:
            managua_weather.append(weather)

In [10]:
weather_single_list = []
for day in managua_weather:
    date_array = day['date'].split('-')
    date = datetime(int(float(date_array[0])),int(float(date_array[1])),int(float(date_array[2])))
    for hour_data in day['hourly']:
        hour = int(float(hour_data['time'])/100)
        hour_data['datetime']=date.replace(hour=hour)
        weather_single_list.append(hour_data)

df_temp = pd.DataFrame(weather_single_list, columns=hour_data.keys())
df_temp = df_temp.set_index('datetime')
for column in df_temp:
    if column not in ['weatherIconUrl','winddir16Point','weatherDesc']:
        df_temp[column] = df_temp[column].astype(float)
df_temp_resampled = df_temp.resample('1H').apply(pd.Series.interpolate)
df_temp_resampled['precipMM'] = df_temp.resample('1H').fillna(0)
df_temp = df_temp_resampled
df_temp = df_temp.drop('time',1)

In [11]:
def dow_date_finder(which_weekday_in_month=0,day=3,month=4,year=2015):
    dt = datetime(year,month,1)
    dow_lst = []
    while dt.weekday() != day:
        dt = dt + timedelta(days=1)
    while dt.month == month:
        dow_lst.append(dt)
        dt = dt + timedelta(days=7)
    return dow_lst[which_weekday_in_month]  # may raise an exception if slicing is wrong

holiday_dates = []
for year in years:
    holiday_dates.append(datetime(int(float(year)),1,2))#New Years Day
    holiday_dates.append(datetime(int(float(year)),2,1))#Air Force Day
    holiday_dates.append(dow_date_finder(0,3,4,int(float(year))))#Holy Thursday
    holiday_dates.append(datetime(int(float(year)),5,1))#Labour Day
    holiday_dates.append(datetime(int(float(year)),5,27))#Army Day
    holiday_dates.append(datetime(int(float(year)),7,19))#Liberation Day
    holiday_dates.append(datetime(int(float(year)),7,25))#Fiesta de Santiago
    holiday_dates.append(datetime(int(float(year)),7,26))#Fiesta de Santa Ana
    holiday_dates.append(datetime(int(float(year)),8,1))#Fiesta de Santa Domingo
    holiday_dates.append(datetime(int(float(year)),9,14))#Battle of San Jacinto
    holiday_dates.append(datetime(int(float(year)),9,15))#Independence Day
    holiday_dates.append(datetime(int(float(year)),10,12))#Indigenous Resistance Day
    #holiday_dates.append(datetime(int(float(year)),12,7))#La Griteria Immaculate (Leon only)
    holiday_dates.append(datetime(int(float(year)),12,8))#La Griteria Immaculate
    holiday_dates.append(datetime(int(float(year)),12,25))#Christmas
    holiday_dates.append(datetime(int(float(year)),12,31))#New Years Eve
holiday_dates = [date for date in holiday_dates if df_gen.date[0]<date and df_gen.date[-1]>date]

In [12]:
df_gen['holiday'] = 0
for holiday in holiday_dates:
    df_gen.loc[df_gen['date']==holiday,'holiday'] = 1

df_gen['weekend'] = 0
df_gen.loc[df_gen['weekday']>4,'weekend'] = 1

df_gen['payday'] = 0
df_gen.loc[df_gen['day']==15,'payday'] = 1

df_gen['timeinweek']=24*df_gen['weekday']+df_gen['HORA']

df_gen['wd_peak'] = 1
df_gen.loc[df_gen['HORA']<9,'wd_peak'] = 0
df_gen.loc[df_gen['HORA']>21,'wd_peak'] = 0
df_gen.loc[df_gen['weekend']==1,'wd_peak'] = 0

df_gen['we_peak'] = 1
df_gen.loc[df_gen['HORA']<18,'we_peak'] = 0
df_gen.loc[df_gen['HORA']>21,'we_peak'] = 0
df_gen.loc[df_gen['weekend']==0,'we_peak'] = 0


df_gen['HOURS_PSR'] = df_gen['HORA'] - 6
df_gen.loc[df_gen['HOURS_PSR']<0,'HOURS_PSR'] = df_gen.loc[df_gen['HOURS_PSR']<0,'HOURS_PSR'] + 24

df_gen['HOURS_PSS'] = df_gen['HORA'] - 18
df_gen.loc[df_gen['HOURS_PSS']<0,'HOURS_PSS'] = df_gen.loc[df_gen['HOURS_PSS']<0,'HOURS_PSS'] + 24

In [16]:
#changed EOLO to EOL, EEC50 to EEC, and TPT to TPC
#changed PMGA to PMG, PCH1 to PCHN
df_gen['Albanisa_Bunker'] = df_gen[['PCG1','PCG2','PCG3','PCG4','PCG5','PCG6','PCG7','PCG8','PCG9','PHC1','PHC2']].sum(axis=1)
df_gen['AEI_Wind'] = df_gen[['AMY1','AMY2']].sum(axis=1)
df_gen['Gruo_Terra_Wind'] = df_gen[['PBP']].sum(axis=1)
df_gen['Other_Wind'] = df_gen[['ABR','EOL']].sum(axis=1)
df_gen['AEI_Bunker'] = df_gen[['CEN','EEC20','EEC','TPC']].sum(axis=1)
df_gen['Enel_Hydro'] = df_gen[['PCA1','PCA2','PCF1','PCF2']].sum(axis=1)
df_gen['Enel_Bunker'] = df_gen[['PLB1','PLB2','PMG3','PMG4','PMG5']].sum(axis=1)
df_gen['Other_Bunker'] = df_gen[['PCHN','PNI1','PNI2','GSR']].sum(axis=1)
df_gen['Hidropanasma_Hydro'] = df_gen[['HPA1','HPA2']].sum(axis=1)

bunker_fuel = ['PCG1','PCG2','PCG3','PCG4','PCG5','PCG6','PCG7','PCG8','PCG9','PHC1','PHC2','CEN','EEC20','EEC','TPC','PLB1','PLB2','PMG3','PMG4','PMG5','PCHN','PNI1','PNI2','GSR']
interconnect = ['LNI-L9040','SND-L9090','AMY-L9030','TCPI-L9150']
hydro = ['PCA1','PCA2','PCF1','PCF2','HPA1','HPA2']
geothermal = ['PEN1 y 2','PEN3','PEN4','PMT1','PMT2']
biomass = ['MTR','NSL'] 
wind = ['AMY1','AMY2','ABR','EOL','PBP']
df_gen['bunker'] = df_gen[bunker_fuel].sum(axis=1)
df_gen['interconnect'] = df_gen[interconnect].sum(axis=1)
df_gen['hydro'] = df_gen[hydro].sum(axis=1)
df_gen['geothermal'] = df_gen[geothermal].sum(axis=1)
df_gen['biomass'] = df_gen[biomass].sum(axis=1)
df_gen['wind'] = df_gen[wind].sum(axis=1)
df_big_gen = pd.DataFrame([df_gen['wind'],df_gen['biomass'],df_gen['interconnect'],df_gen['geothermal'],df_gen['bunker'],df_gen['hydro']]).T

In [10]:
demand_keys = ['Demanda','HORA','date','month','day','weekday','holiday','payday','wd_peak','we_peak','HOURS_PSR','HOURS_PSS','weekend','timeinweek']
df_demand = pd.merge(df_temp,df_gen[demand_keys],how='inner',left_index=True,right_index=True)
df_all = pd.merge(df_temp,df_gen,how='inner',left_index=True,right_index=True)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-b00303109daa> in <module>()
      1 demand_keys = ['Demanda','HORA','date','month','day','weekday','holiday','payday','wd_peak','we_peak','HOURS_PSR','HOURS_PSS','weekend','timeinweek']
----> 2 df_demand = pd.merge(df_temp,df_gen[demand_keys],how='inner',left_index=True,right_index=True)
      3 df_all = pd.merge(df_temp,df_gen,how='inner',left_index=True,right_index=True)

NameError: name 'df_temp' is not defined

In [18]:
with open('../data/df_demand.pkl','w') as f:
    pickle.dump(df_demand,f)
with open('../data/df_all.pkl','w') as f:
    pickle.dump(df_all,f)

In [20]:
df_all.to_csv('steve.xls')

In [9]:



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-7fa3eeb5d3a3> in <module>()
----> 1 df_demand.keys()

NameError: name 'df_demand' is not defined

In [ ]: