In [24]:
from os.path import isdir, join, dirname, abspath
from os import listdir, getcwd
homedir='/Users/GJWood/nilm_gjw_data/building1'
dir_list = [i for i in listdir(homedir) if isdir(join(homedir,i))]
dir_list.sort()
while dir_list is not
print 'Current dir list:',dir_list
for folder in dir_list:
In [15]:
from os import listdir, getcwd
dir_list = listdir('/Users/GJWood/nilm_gjw_data/building1/elec'/Users)
dir_list.sort()
print dir_list
In [47]:
import pandas as pd
import numpy as np
from copy import deepcopy
from os.path import join, isdir, isfile
from os import listdir
import fnmatch
import re
import datetime
from sys import stdout
from nilmtk.utils import get_datastore
from nilmtk.datastore import Key
from nilmtk.timeframe import TimeFrame
from nilmtk.measurement import LEVEL_NAMES
from nilmtk.utils import get_module_directory, check_directory_exists
from nilm_metadata import convert_yaml_to_hdf5, save_yaml_to_datastore
filename_prefix_mapping = {
'apparent' : ('4-POWER_REAL_FINE '),
'reactive' : ('5-POWER_REACTIVE_STANDARD ')
}
filename_suffix_mapping = {
'apparent' : (' Dump'),
'reactive' : (' Dump')
}
TIMEZONE = "Europe/London"
TIMESTAMP_COLUMN_NAME = "timestamp"
ACTIVE_COLUMN_NAME = "active"
REACTIVE_COLUMN_NAME = "reactive"
bld_re = re.compile('building\d+')
bld_nbr_re = re.compile ('\d+')
iso_date_re = re.compile ('\d{4}-\d{2}-\d{2}') # used to pull the date from the file name
os.chdir('/Users/GJWood/nilm_gjw_data')
df = pd.DataFrame(columns=[TIMESTAMP_COLUMN_NAME,ACTIVE_COLUMN_NAME,REACTIVE_COLUMN_NAME])
for current_dir, dirs_in_current_dir, files in os.walk(os.getcwd()):
print 'current_dir',current_dir,'cwd', os.getcwd()
print dirs_in_current_dir
m = bld_re.search(current_dir)
if m:
building_name = m.group()
building_number = int(bld_nbr_re.search(building_name).group())
for items in fnmatch.filter(files, "4*.csv"):
d= iso_date_re.search(items).group()
fn1 = filename_prefix_mapping['apparent']+d+filename_suffix_mapping['apparent']+'.csv'
fn2 = filename_prefix_mapping['reactive']+d+filename_suffix_mapping['reactive']+'.csv'
print fn1 +' <-> '+ fn2
ffn1 = join(current_dir,fn1)
ffn2 = join(current_dir,fn2)
df1 = pd.read_csv(ffn1,names=[TIMESTAMP_COLUMN_NAME,ACTIVE_COLUMN_NAME])
df2 = pd.read_csv(ffn2,names=[TIMESTAMP_COLUMN_NAME,REACTIVE_COLUMN_NAME])
df3 = pd.merge(df1,df2,on=TIMESTAMP_COLUMN_NAME)
df = pd.concat([df,df3])
df.drop_duplicates(subset=[TIMESTAMP_COLUMN_NAME], inplace=True)
df.index = pd.to_datetime(df.timestamp.values, unit='s', utc=True)
df = df.tz_convert(TIMEZONE)
df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
print df
In [57]:
print join(homedir,"HDF5","nilm_gjw_data.hdf5")
In [ ]: