In [1]:
%matplotlib inline
import json
import pandas as pd
import numpy as np
import csv
import glob
import os
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
from matplotlib import rcParams
from datetime import datetime

#Path for folders that contain individual csv files of trace data.
folder_path='/home/steve/DSSG/tracebase/complete/'


/usr/local/lib/python2.7/dist-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [2]:
#This function imports a single CSV file into an array
def import_csv(filename):
    df = pd.read_csv(filename,sep=';',header=None,names=["Time","1W","8W"])
    return df

In [ ]:
device='Refrigerator'
instance='76C07F'
date
file_signal[device_name]=import_csv(filename)

In [3]:
#This function imports multiple CSV files from a single device folder
def import_device(path,device):
    device_signal ={}
    i=0
    file_signal={}
    print device
    for filename in glob.glob(path+device+'/*'):
        i=i+1
        print device + " " + filename
        device_name=filename[filename.index('_')+1:]
        device_name=device_name[0:device_name.index('.csv')]
        file_signal[device_name]=import_csv(filename)
    device_signal[device]=file_signal
    return device_signal

In [4]:
#After choosing a device, this imports that device into a dictionary called device_signal
device="Cookingstove"
device_signal=import_device(folder_path,device)


Cookingstove
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2012.01.08.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2012.01.07.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.25.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.17.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.20.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.31.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.30.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2012.01.01.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2012.01.11.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.18.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.19.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.21.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.26.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2012.01.10.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2011.12.27.csv
Cookingstove /home/steve/DSSG/tracebase/complete/Cookingstove/dev_D33097_2012.01.09.csv

In [5]:
#DEBUGGING ONLY: Shows initial data. 
device_name=device_signal[device].keys()[0]
pylab.rcParams['figure.figsize'] = 16, 12
time=[]
for x in device_signal[device][device_name]["Time"]:
    time.append(datetime.strptime(x,'%d/%m/%Y %H:%M:%S'))
plt.plot(time,device_signal[device][device_name]["1W"])

sum = 0
for x in device_signal[device][device_name]["1W"]:
    sum = sum + x
print sum/3600


36

In [6]:
#This takes the device and agggregates to 1 minute and 15 minute intervals
#It also attempts to guess state
step_changes={}
aggregator={}

def aggregate_and_on_off(appliance,min_diff):
    
    #Variables for on-off check
    past_val=0
    diff=0
    turn_ons=[]
    turn_offs=[]
    on_avg=[]
    duration_on=[]
    
    start_on=0
    is_on=False
    sum_on=0
    
    #Variables for aggregating appliance data
    every_15=[]
    time_15 =[]
    sum_15=0
    
    every_1=[]
    time_1=[]
    sum_1=0
    
    every_all=[]
    time_all=[]

    
    
    
    #These are used to check that 1 minute or 15 minutes have passed by storing the exact time of the previous round. 
    #This is necessary because the time signals are not exactly on times.
    last_1=0
    last_15=0
    past_time=0
    index_1=0
    index_15=0
    index_on=0
    '''units:
       The initial units are the average watts over one second. Therefore, the units are in Watt-seconds/second. 
       By averaging these values over the course of one minute, the unit becomes Watt-seconds/minute.
       By averaging these values over the course of 15 minutes, the unit becomes Watt-seconds/15mins.
      
       
       
    '''
    for index,row in appliance.iterrows():
        
       
        
        now = datetime.strptime(row['Time'],'%d/%m/%Y %H:%M:%S')
        if(past_time != now): ##Gets rid of duplicates
           
            #Store normal values with time as date_time
            time_all.append(now)
            every_all.append(row['1W'])
            
            #These are used to average the 1-min and 15-min values by counting the number of points in each interval
            index_1=index_1+1
            index_15=index_15+1
            #15-min Aggregator
            if float(now.strftime('%s'))-last_15>900:
                last_15=float(now.strftime('%s'))
                every_15.append(sum_15/index_15)
                time_15.append(now) 
                sum_15=0
                index_15=0
            else:
                sum_15=sum_15+row['1W']
            
            #1-min Aggregator
            if float(now.strftime('%s'))-last_1>60:
                every_1.append(sum_1/index_1)
                time_1.append(now)
                last_1=float(now.strftime('%s'))
                sum_1=0
                index_1=0

            else:
                sum_1=sum_1+row['1W']
                
                
            #Checker for when device is on
            
           
            if row['1W']>10 and not is_on:
                turn_ons.append(str(now))
                start_on=now
                is_on=True;
                index_on=index_on+1
            if is_on:
                sum_on=sum_on+float(row['1W']) 
                index_on=index_on+1
            if row['1W']<10 and is_on:
                turn_offs.append(str(now))
                #Gets all on states longer than 60 seconds
                if((now-start_on).total_seconds()>60):
                    duration_on.append((now-start_on).total_seconds())
                    on_avg.append(sum_on/index_on)
                sum_on=0
                is_on=False
                index_on=0
            
        #Variables so that previous value can be used in subsequent iteration of for loop       
        past_time=now
        past_val=row['1W']
    #Storage of on-off data in dictionary
    step_changes['turn_on'] = turn_ons
    step_changes['turn_off']=turn_offs
    step_changes['duration']=duration_on
    step_changes['on_values']= on_avg
    
    #Storage of aggregated values in dictionary
    aggregator['every_15_energy']=every_15
    aggregator['every_1_energy']=every_1
    aggregator['every_all_energy']=every_all
    aggregator['time_all']=time_all
    aggregator['time_1']=time_1
    aggregator['time_15']=time_15

In [10]:
#Creates JSON files from tracebase based on provided device, starting at device_index
#min_val is threshold for whether a device should be considered on or off
#device_namedate is string 'instance name'_'date' ex) D32290_12.06.11
#This function calls the aggregator function above

def makeFiles(device_signal):
    num_files=len(device_signal[device].keys())

    if not os.path.exists('Tracebase/'+device):
        os.makedirs('Tracebase/'+device)
    for index,device_namedate in enumerate(device_signal[device]):
        if(index>=device_index):
            device_name=device_namedate[:device_namedate.index('_')]
            if not os.path.exists('Tracebase/'+device+'/'+device_name):
                os.makedirs('Tracebase/'+device+'/'+device_name)
            aggregate_and_on_off(device_signal[device][device_namedate],min_val)
            #Turn 2 Arrays into a dict and stores in JSON File
            store_json={}
            json_15={}
            json_1={}
            json_on_stats={}
            time_index_15=0
            time_index_1=0
            time_1_array=[]
            value_1_array=[]
            time_15_array=[]
            value_15_array=[]
            for time in aggregator['time_15']:
                time_15_array.append(str(time))
                value_15_array.append(aggregator['every_15_energy'][time_index_15])
                time_index_15=time_index_15+1
            for time in aggregator['time_1']:
                time_1_array.append(str(time))
                value_1_array.append(aggregator['every_1_energy'][time_index_1])
                time_index_1=time_index_1+1
            filename=device+'_'+device_namedate+'.json'
            json_15['times']=time_15_array
            json_15['values']=value_15_array
            json_1['times']=time_1_array
            json_1['values']=value_1_array
            store_json['time_15']=json_15
            store_json['time_1']=json_1
            store_json['on_stats']=step_changes
            with open('Tracebase/'+device+'/'+'/'+device_name+'/'+filename, 'w') as outfile:
                json.dump(store_json, outfile)
            print str(index+1) + ' of ' + str(num_files)

In [8]:
#This takes the device_signal ditcionary from when the device was loaded
#It makes the json files by aggregating the 1-second data
device_index=0
min_val=15
makeFiles(device_signal)


1 of 16
2 of 16
3 of 16
4 of 16
5 of 16
6 of 16
7 of 16
8 of 16
9 of 16
10 of 16
11 of 16
12 of 16
13 of 16
14 of 16
15 of 16
16 of 16

In [12]:
#DEBUGGING ONLY - Runs a single file
aggregate_and_on_off(device_signal[device][0][:10000],5)
pylab.rcParams['figure.figsize'] = 16, 12
plt.subplot(3, 1, 1)
plt.plot(aggregator['time_all'],aggregator['every_all_energy'],marker="o",linestyle="dashed")
plt.subplot(3, 1, 2)
plt.plot(aggregator['time_1'],aggregator['every_1_energy'],marker="o",linestyle="dashed")
plt.subplot(3,1,3)
plt.plot(aggregator['time_15'],aggregator['every_15_energy'],marker="o",linestyle="dashed")


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-12-008f020e2643> in <module>()
      1 #DEBUGGING ONLY - Runs a single file
----> 2 aggregate_and_on_off(device_signal[device][0][:10000],5)
      3 pylab.rcParams['figure.figsize'] = 16, 12
      4 plt.subplot(3, 1, 1)
      5 plt.plot(aggregator['time_all'],aggregator['every_all_energy'],marker="o",linestyle="dashed")

KeyError: 0

In [1]:
#Takes a JSON file and reloads it back into python
instance='D35C05'
filename='Tracebase/'+device+'/'+instance+'/'+device+'_'+instance+'_2012.06.20.json'
json_data = open(filename)
check_data = json.load(json_data)

time_array_1=[]
time_array_15=[]
for time in check_data['time_1']['times']:
    time_array_1.append(datetime.strptime(time,'%Y-%m-%d %H:%M:%S'))
for time in check_data['time_15']['times']:
    time_array_15.append(datetime.strptime(time,'%Y-%m-%d %H:%M:%S'))
plt.subplot(2, 1, 1)
plt.plot(time_array_1,check_data['time_1']['values'],marker="o",linestyle="dashed")
plt.subplot(2, 1, 2)
plt.plot(time_array_15,check_data['time_15']['values'],marker="o",linestyle="dashed")


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-cb9f6dd0b452> in <module>()
      1 #Takes a JSON file and reloads it back into python
      2 instance='D35C05'
----> 3 filename='Tracebase/'+device+'/'+instance+'/'+device+'_'+instance+'_2012.06.20.json'
      4 json_data = open(filename)
      5 check_data = json.load(json_data)

NameError: name 'device' is not defined

In [ ]: