Combine load and weather CSVs that can be used in testing


In [6]:
import pandas as pd
import numpy as np
from datetime import datetime as dt, timedelta
import datetime
import mpld3

mpld3.enable_notebook()
%matplotlib inline

In [13]:
def create_test_csv(region):
    load = pd.read_csv('load/{}.csv'.format(region))
    weather = pd.read_csv('weather/{}.csv'.format(region))
    
    large_df = pd.DataFrame()
    l, t = [], []
    for column in load.columns:
        l.append(list(load[column]))
        t.append(list(weather[column]))
    # flatten
    lf = [i for s in l for i in s]
    tf = [i for s in t for i in s]

    large_df['load'] = lf
    large_df['tempc'] = tf

    # fix outliers
    large_df['tempc'].replace([-9999], np.nan, inplace=True)
    large_df['tempc'].ffill(inplace=True)
    large_df['load'].ffill(inplace=True)

    d = []
    for i in range(2002, 2019):
        d.append([(dt(i, 1, 1) + timedelta(hours=1)*x) for x in range(8760)]) 
    large_df['dates'] = [i for s in d for i in s]

    STD = 2.5
    large_df['tempc'] += np.random.normal(0, STD, large_df.shape[0])

    large_df.to_csv('test/{}.csv'.format(region), index=False)

In [16]:
for region in os.listdir('weather'):
    region = region[:-4]
    if not region.startswith('.'):
        create_test_csv(region)