In [6]:
import pandas as pd
import numpy as np
from datetime import datetime as dt, timedelta
import datetime
import mpld3
mpld3.enable_notebook()
%matplotlib inline
In [13]:
def create_test_csv(region):
load = pd.read_csv('load/{}.csv'.format(region))
weather = pd.read_csv('weather/{}.csv'.format(region))
large_df = pd.DataFrame()
l, t = [], []
for column in load.columns:
l.append(list(load[column]))
t.append(list(weather[column]))
# flatten
lf = [i for s in l for i in s]
tf = [i for s in t for i in s]
large_df['load'] = lf
large_df['tempc'] = tf
# fix outliers
large_df['tempc'].replace([-9999], np.nan, inplace=True)
large_df['tempc'].ffill(inplace=True)
large_df['load'].ffill(inplace=True)
d = []
for i in range(2002, 2019):
d.append([(dt(i, 1, 1) + timedelta(hours=1)*x) for x in range(8760)])
large_df['dates'] = [i for s in d for i in s]
STD = 2.5
large_df['tempc'] += np.random.normal(0, STD, large_df.shape[0])
large_df.to_csv('test/{}.csv'.format(region), index=False)
In [16]:
for region in os.listdir('weather'):
region = region[:-4]
if not region.startswith('.'):
create_test_csv(region)