In [1]:
%matplotlib inline
import json
import pandas as pd
import numpy as np
import csv
import glob
import os
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
from matplotlib import rcParams
from datetime import datetime
#Path for folders that contain individual csv files of trace data.
folder_path='/home/steve/DSSG/tracebase/complete/'
In [2]:
#This function imports a single CSV file into an array
def import_csv(filename):
df = pd.read_csv(filename,sep=';',header=None,names=["Time","1W","8W"])
return df
In [ ]:
device='Refrigerator'
instance='76C07F'
date
file_signal[device_name]=import_csv(filename)
In [3]:
#This function imports multiple CSV files from a single device folder
def import_device(path,device):
device_signal ={}
i=0
file_signal={}
print device
for filename in glob.glob(path+device+'/*'):
i=i+1
print device + " " + filename
device_name=filename[filename.index('_')+1:]
device_name=device_name[0:device_name.index('.csv')]
file_signal[device_name]=import_csv(filename)
device_signal[device]=file_signal
return device_signal
In [4]:
#After choosing a device, this imports that device into a dictionary called device_signal
device="Cookingstove"
device_signal=import_device(folder_path,device)
In [5]:
#DEBUGGING ONLY: Shows initial data.
device_name=device_signal[device].keys()[0]
pylab.rcParams['figure.figsize'] = 16, 12
time=[]
for x in device_signal[device][device_name]["Time"]:
time.append(datetime.strptime(x,'%d/%m/%Y %H:%M:%S'))
plt.plot(time,device_signal[device][device_name]["1W"])
sum = 0
for x in device_signal[device][device_name]["1W"]:
sum = sum + x
print sum/3600
In [6]:
#This takes the device and agggregates to 1 minute and 15 minute intervals
#It also attempts to guess state
step_changes={}
aggregator={}
def aggregate_and_on_off(appliance,min_diff):
#Variables for on-off check
past_val=0
diff=0
turn_ons=[]
turn_offs=[]
on_avg=[]
duration_on=[]
start_on=0
is_on=False
sum_on=0
#Variables for aggregating appliance data
every_15=[]
time_15 =[]
sum_15=0
every_1=[]
time_1=[]
sum_1=0
every_all=[]
time_all=[]
#These are used to check that 1 minute or 15 minutes have passed by storing the exact time of the previous round.
#This is necessary because the time signals are not exactly on times.
last_1=0
last_15=0
past_time=0
index_1=0
index_15=0
index_on=0
'''units:
The initial units are the average watts over one second. Therefore, the units are in Watt-seconds/second.
By averaging these values over the course of one minute, the unit becomes Watt-seconds/minute.
By averaging these values over the course of 15 minutes, the unit becomes Watt-seconds/15mins.
'''
for index,row in appliance.iterrows():
now = datetime.strptime(row['Time'],'%d/%m/%Y %H:%M:%S')
if(past_time != now): ##Gets rid of duplicates
#Store normal values with time as date_time
time_all.append(now)
every_all.append(row['1W'])
#These are used to average the 1-min and 15-min values by counting the number of points in each interval
index_1=index_1+1
index_15=index_15+1
#15-min Aggregator
if float(now.strftime('%s'))-last_15>900:
last_15=float(now.strftime('%s'))
every_15.append(sum_15/index_15)
time_15.append(now)
sum_15=0
index_15=0
else:
sum_15=sum_15+row['1W']
#1-min Aggregator
if float(now.strftime('%s'))-last_1>60:
every_1.append(sum_1/index_1)
time_1.append(now)
last_1=float(now.strftime('%s'))
sum_1=0
index_1=0
else:
sum_1=sum_1+row['1W']
#Checker for when device is on
if row['1W']>10 and not is_on:
turn_ons.append(str(now))
start_on=now
is_on=True;
index_on=index_on+1
if is_on:
sum_on=sum_on+float(row['1W'])
index_on=index_on+1
if row['1W']<10 and is_on:
turn_offs.append(str(now))
#Gets all on states longer than 60 seconds
if((now-start_on).total_seconds()>60):
duration_on.append((now-start_on).total_seconds())
on_avg.append(sum_on/index_on)
sum_on=0
is_on=False
index_on=0
#Variables so that previous value can be used in subsequent iteration of for loop
past_time=now
past_val=row['1W']
#Storage of on-off data in dictionary
step_changes['turn_on'] = turn_ons
step_changes['turn_off']=turn_offs
step_changes['duration']=duration_on
step_changes['on_values']= on_avg
#Storage of aggregated values in dictionary
aggregator['every_15_energy']=every_15
aggregator['every_1_energy']=every_1
aggregator['every_all_energy']=every_all
aggregator['time_all']=time_all
aggregator['time_1']=time_1
aggregator['time_15']=time_15
In [10]:
#Creates JSON files from tracebase based on provided device, starting at device_index
#min_val is threshold for whether a device should be considered on or off
#device_namedate is string 'instance name'_'date' ex) D32290_12.06.11
#This function calls the aggregator function above
def makeFiles(device_signal):
num_files=len(device_signal[device].keys())
if not os.path.exists('Tracebase/'+device):
os.makedirs('Tracebase/'+device)
for index,device_namedate in enumerate(device_signal[device]):
if(index>=device_index):
device_name=device_namedate[:device_namedate.index('_')]
if not os.path.exists('Tracebase/'+device+'/'+device_name):
os.makedirs('Tracebase/'+device+'/'+device_name)
aggregate_and_on_off(device_signal[device][device_namedate],min_val)
#Turn 2 Arrays into a dict and stores in JSON File
store_json={}
json_15={}
json_1={}
json_on_stats={}
time_index_15=0
time_index_1=0
time_1_array=[]
value_1_array=[]
time_15_array=[]
value_15_array=[]
for time in aggregator['time_15']:
time_15_array.append(str(time))
value_15_array.append(aggregator['every_15_energy'][time_index_15])
time_index_15=time_index_15+1
for time in aggregator['time_1']:
time_1_array.append(str(time))
value_1_array.append(aggregator['every_1_energy'][time_index_1])
time_index_1=time_index_1+1
filename=device+'_'+device_namedate+'.json'
json_15['times']=time_15_array
json_15['values']=value_15_array
json_1['times']=time_1_array
json_1['values']=value_1_array
store_json['time_15']=json_15
store_json['time_1']=json_1
store_json['on_stats']=step_changes
with open('Tracebase/'+device+'/'+'/'+device_name+'/'+filename, 'w') as outfile:
json.dump(store_json, outfile)
print str(index+1) + ' of ' + str(num_files)
In [8]:
#This takes the device_signal ditcionary from when the device was loaded
#It makes the json files by aggregating the 1-second data
device_index=0
min_val=15
makeFiles(device_signal)
In [12]:
#DEBUGGING ONLY - Runs a single file
aggregate_and_on_off(device_signal[device][0][:10000],5)
pylab.rcParams['figure.figsize'] = 16, 12
plt.subplot(3, 1, 1)
plt.plot(aggregator['time_all'],aggregator['every_all_energy'],marker="o",linestyle="dashed")
plt.subplot(3, 1, 2)
plt.plot(aggregator['time_1'],aggregator['every_1_energy'],marker="o",linestyle="dashed")
plt.subplot(3,1,3)
plt.plot(aggregator['time_15'],aggregator['every_15_energy'],marker="o",linestyle="dashed")
In [1]:
#Takes a JSON file and reloads it back into python
instance='D35C05'
filename='Tracebase/'+device+'/'+instance+'/'+device+'_'+instance+'_2012.06.20.json'
json_data = open(filename)
check_data = json.load(json_data)
time_array_1=[]
time_array_15=[]
for time in check_data['time_1']['times']:
time_array_1.append(datetime.strptime(time,'%Y-%m-%d %H:%M:%S'))
for time in check_data['time_15']['times']:
time_array_15.append(datetime.strptime(time,'%Y-%m-%d %H:%M:%S'))
plt.subplot(2, 1, 1)
plt.plot(time_array_1,check_data['time_1']['values'],marker="o",linestyle="dashed")
plt.subplot(2, 1, 2)
plt.plot(time_array_15,check_data['time_15']['values'],marker="o",linestyle="dashed")
In [ ]: