Frequency analysis using Tracebase data


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pylab
import pywt #must pip install PyWavelets
import json
from numpy import sin, linspace, pi
from pylab import plot, show, title, xlabel, ylabel, subplot
from scipy import fft, arange
from scipy import pi
import pandas as pd
from collections import OrderedDict
import os
import random
%matplotlib inline
pylab.rcParams['figure.figsize'] = 16, 12


/usr/local/lib/python2.7/dist-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [4]:
#Produces arrays of arrays for HMM
def values_to_array(values):
    a=[]
    X=[]
    for i in values:
        a.append(i)
        X.append(a)
        a=[]
    return np.array(X)

In [5]:
#Produces single array from arrays of arrays
def array_to_values(values):
    a=[]
    for i in values:
        a.append(i[0])
    return a

In [6]:
#Takes JSON files and reloads them back into python
def open_instance_15min(directory, instance_name,filename):
    with open(str(directory)+'/{0}/{1}'.format(instance_name,filename)) as f:
        return pd.DataFrame(json.load(f)['time_15'])

In [7]:
#Using Values as single feature
def getAllInstanceModelsFromFolder(database,device_type):
    model_list=OrderedDict()
    device_instances=OrderedDict()
    directory= os.getcwd()+'/../'+database+'/'+device_type+'/'
    for i,instance_name in enumerate(os.listdir(directory)):
        device_instances[instance_name] = [open_instance_15min(directory,instance_name,filename)for filename in os.listdir(directory+'/'+instance_name)]
    return device_instances

In [1]:
def getSpectrum(y,Fs,norm=True,dc=True):
    n = len(y) # length of the signal
    k = arange(n)
    T = n/Fs
    frq = k/T # two sides frequency range
    frq = frq[range(n/2)] # one side frequency range
    Y = fft(y)/n # fft computing and normalization
    Y = Y[range(n/2)]
    if(norm):
        total=np.trapz(abs(Y)) #Normalizing over total energy
    else:
        total=1
    Y=np.divide(Y,total)
    if(not dc):
        Y=Y[5:]
        frq=frq[5:]
    return frq,Y

def generateSpectrums(traces,color,norm=True,comb=False,dc=True,plotSpect=True):
    trace_length=24
    Fs = 4.0;  # sampling rate
    Ts = 1.0/Fs; # sampling interval
    t = arange(0,trace_length,Ts) # time vector

    
    if(not comb):
        for trace in traces:
            if(len(trace)==96):
                subplot(2,1,1)
                plot(t,trace['values'].values)
                xlabel('Time')
                ylabel('Amplitude')
                subplot(2,1,2)
                frq,Y=getSpectrum(trace['values'].values,Fs,norm)
                plot(frq,abs(Y),color) # plotting the spectrum
                xlabel('Freq (Cycles/Hour)')
                ylabel('Normalized |Y(freq)|')
    else:
        total=[]
        t_total=[]
        for i,trace in enumerate(traces):
            total=np.concatenate([total,trace['values'].values])
            t_total=np.concatenate([t_total,t+(trace_length*i)])
        if(plotSpect):
            subplot(2,1,1)
            plot(total)
            xlabel('Time')
            ylabel('Amplitude')
            subplot(2,1,2)
            frq,Y=getSpectrum(total,Fs,norm,dc)
            plot(frq,abs(Y),color) # plotting the spectrum
            xlabel('Freq (Cycles/Hour)')
            ylabel('Normalized |Y(freq)|')

In [92]:
#Initialize Variables
devices_types=[]
devices_data={}
database='Tracebase'

In [93]:
device_type='Cookingstove'
devices_types.append(device_type)
devices_data[device_type]=getAllInstanceModelsFromFolder(database,device_type)

In [94]:
print devices_data[device_type].keys()
device_instance= devices_data[device_type].keys()[0]


['D33097']

In [95]:
#Cooking Stove Frequency Signal
colors ="bgrcmykw"
index=1
instance= devices_data[device_type].keys()[0]
print len(devices_data[device_type][instance])
generateSpectrums(devices_data[device_type][instance][1:2],colors[0],comb=True)


16

In [96]:
device_type='Refrigerator'
devices_types.append(device_type)
devices_data[device_type]=getAllInstanceModelsFromFolder(database,device_type)
#Refrigerator Frequency Signals
colors ="bgrcmykw"
for i,instance in enumerate(devices_data['Refrigerator'].keys()):
    generateSpectrums(devices_data[device_type][instance],colors[i],comb=True,norm=True,dc=False)



In [97]:
colors ="bgrcmykw"
generateSpectrums(devices_data[device_type]['76C07F'][3:4],colors[0])



In [98]:
device_type='PC-Desktop'
devices_types.append(device_type)
devices_data[device_type]=getAllInstanceModelsFromFolder(database,device_type)

In [99]:
print devices_data[device_type].keys()
device_instance= devices_data[device_type].keys()[0]


['7296D7', '11F01E', 'Schalli', 'D35C05', '59ADA7', 'B7E6FA', '59AC89', '59AC8E', 'B7BEA1', 'D337C9', 'Denis']

In [100]:
#PC-Desktop Frequency Signals
colors ="bgrcmykwbgrcmykw"
for i,instance in enumerate(devices_data[device_type].keys()):
    generateSpectrums(devices_data[device_type][instance][1:2],colors[i],comb=False)



In [101]:
#Aggregated Frequency Signals
colors ="bgrcmykwbgrcmykw"
power_total=np.zeros((96,1))
day_index=1
test_data={}
for i,device_type in enumerate(devices_types):
    instance_name=devices_data[device_type].keys()[0]
    test_data[device_type]=values_to_array(devices_data[device_type][instance_name][day_index]['values'].values)
    power_total=power_total+test_data[device_type] 
    generateSpectrums(devices_data[device_type][instance_name],colors[i],norm=True,comb=True,dc=False)
    print str(device_type)+":"+colors[i]

plt.figure()   
Fs = 4.0;  # sampling rate
Ts = 1.0/Fs; # sampling interval
t = arange(0,24,Ts) # time vector
subplot(2,1,1)
plot(t,power_total,'k')
subplot(2,1,2)
frq,Y=getSpectrum(array_to_values(power_total),Fs,norm=True,dc=False)
plot(frq,abs(Y),'k') # plotting the spectrum


Cookingstove:b
Refrigerator:g
PC-Desktop:r
Out[101]:
[<matplotlib.lines.Line2D at 0x7fce0c100a90>]

In [112]:
#Aggregated Frequency Signals without fridge
colors ="bgrcmykwbgrcmykw"
power_total_no_fridge=np.zeros((96,1))
day_index=1
test_data={}
for i,device_type in enumerate(devices_types):
    instance_name=devices_data[device_type].keys()[0]
    if('Refrigerator' not in device_type):
        test_data[device_type]=values_to_array(devices_data[device_type][instance_name][day_index]['values'].values)
        power_total_no_fridge=power_total_no_fridge+test_data[device_type] 
        print str(device_type)+":"+colors[i]

plt.figure()   

print
power_total=np.zeros((96,1))
day_index=1
test_data={}
instance_names=['D33097', 'Refrigerator','7296D7']

for i,device_type in enumerate(devices_types):
    instance_name=instance_names[i]
    print (device_type)+":"+colors[i]+str(devices_data[device_type].keys())
    test_data[device_type]=values_to_array(devices_data[device_type][instance_name][day_index]['values'].values)
    power_total=power_total+test_data[device_type]
    plt.plot(array_to_values(test_data[device_type]))
plt.figure()
frq,Y=getSpectrum(array_to_values(power_total),Fs,norm=True,dc=False)
plot(frq,abs(Y),'r') # plotting the spectrum
Fs = 4.0;  # sampling rate
Ts = 1.0/Fs; # sampling interval
t = arange(0,24,Ts) # time vector
frq,Y=getSpectrum(array_to_values(power_total_no_fridge),Fs,norm=True,dc=False)
plot(frq,abs(Y),'k') # plotting the spectrum


Cookingstove:b
PC-Desktop:r

Cookingstove:b['D33097']
Refrigerator:g['76C07F', 'D331DA', '98C08A', 'Refrigerator', 'D32131', 'B83B9E', '599393', 'B7E6F4']
PC-Desktop:r['7296D7', '11F01E', 'Schalli', 'D35C05', '59ADA7', 'B7E6FA', '59AC89', '59AC8E', 'B7BEA1', 'D337C9', 'Denis']
Out[112]:
[<matplotlib.lines.Line2D at 0x7fce07bb8e50>]

In [113]:
device_type='Refrigerator'
instance_name=devices_data[device_type].keys()[0]
for instance in devices_data[device_type][instance_name][1:2]:
    wavelet_test=instance['values'].values
    cA, cD = pywt.dwt(wavelet_test, 'db1')
    plt.plot(wavelet_test)
    plt.plot(cA)
    plt.plot(cD)



In [ ]: