In [53]:
%matplotlib inline
import sys
import os
sys.path.append(os.path.join(os.pardir,os.pardir))
import disaggregator as da
from disaggregator import PecanStreetDatasetAdapter as psda
import matplotlib.pyplot as plt
import numpy as np
import json
import urllib2
import pandas as pd
import collections 
from scipy import stats
from datetime import datetime, timedelta, date

In [4]:
db_url = "postgresql://USERNAME:PASSWORD@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)

In [35]:
psda.get_table_names('shared')


Out[35]:
[u'validated_01_2014',
 u'validated_02_2014',
 u'validated_04_2014',
 u'validated_05_2014',
 u'validated_03_2014']

In [36]:
dataids = psda.get_dataids_with_real_values('shared','validated_05_2014','use')

In [49]:
traces = psda.generate_traces_for_appliance_by_dataids('shared', 'validated_05_2014', 'use', dataids[:50], sample_rate='15T')


select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=26
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=59
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=86
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=93
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=94
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=252
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=280
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=410
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=434
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=484
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=499
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=580
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=585
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=624
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=661
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=739
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=744
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=774
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=821
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=871
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=936
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1086
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1167
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1283
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1334
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1450
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1507
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1617
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1632
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1681
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1696
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1714
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1718
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1782
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1790
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1800
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1830
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1953
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=1994
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2034
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2094
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2129
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2156
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2158
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2171
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2233
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2242
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2337
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2365
select use,localminute from "PecanStreet_SharedData".validated_05_2014 where dataid=2378

In [50]:
traces_array = [np.array(trace.series.values,dtype=np.float) for trace in traces]

In [51]:
for trace_array,dataid in zip(traces_array,dataids):
    fig = plt.hist(trace_array,bins=80)
    plt.title("dataid: {}".format(dataid))
    plt.ylabel("#occurences in a month")
    plt.xlabel("kW bin")
    plt.show()



In [54]:
def get_weather_data(api,city,state,start_date,end_date):  
    if(start_date is not None and end_date is not None):

        #format our date structure to pass to our http request
        date_format = "%Y%m%d"
        a = datetime.strptime(start_date, date_format)
        b = datetime.strptime(end_date, date_format)
        #get number of days from start_date to end_date
        delta = b - a
        num_days = delta.days
        objects_list = []
    
        #create new variable that will create query's for the api
        for year in range(0,num_days + 1):
            #count from start_date to end_date
            dates = a + timedelta(days=year)
            #format our str with our date_format
            formatted_dates = datetime.strftime(dates, date_format)
            #create query which will iterate through desired weather period
            query = 'http://api.wunderground.com/api/'+ api +'/history_'+formatted_dates+'/q/'+state+'/'+city+'.json'
            #iterate through the number of days and query the api. dump json results every time 
            f = urllib2.urlopen(query)
            #read query as a json string
            json_string = f.read()
            #parse/load json string
            parsed_json = json.loads(json_string)
            #Iterate through each json object and append it to an ordered dictionary
            for i in parsed_json['history']['observations']:        
                d = collections.OrderedDict()
                d['date'] = i['date']['mon'] + '/' + i['date']['mday'] + '/' + i['date']['year']
                d['time'] = i['date']['pretty'][0:8]
                d['temp'] = i['tempi']
                d['conds'] = i['conds']
                d['wdire'] = i['wdire']
                d['wdird'] = i['wdird']
                d['hail'] = i['hail']
                d['thunder'] = i['thunder']
                d['pressurei'] = i['pressurei']
                d['snow'] = i['snow']
                d['pressurem'] = i['pressurem']
                d['fog'] = i['fog']
                d['tornado'] = i['tornado']
                d['hum'] = i['hum']
                d['tempi'] = i['tempi']
                d['tempm'] = i['tempm']
                d['dewptm'] = i['dewptm']
                d['dewpti'] = i['dewpti']
                d['rain'] = i['rain']
                d['visim'] = i['visi']
                d['wspdi'] = i['wspdi']
                d['wspdm'] = i['wspdm']
                objects_list.append(d)
                #dump the dictionary into a json object
                j = json.dumps(objects_list)
        #append our json object to a list for every day and return its data
    #    print j
        return j
    #If we just need the data for ONE day (pass None for end_date):
    if(end_date is None):
        f = urllib2.urlopen('http://api.wunderground.com/api/1d83c5de274645d4/history_'+start_date+'/q/'+state+'/'+city+'.json')
        json_string = f.read()
        parsed_json = json.loads(json_string)
    
        objects_list = []
        for i in parsed_json['history']['observations']:        
            d = collections.OrderedDict()
            d['date'] = i['date']['mon'] + '/' + i['date']['mday'] + '/' + i['date']['year']
            d['time'] = i['date']['pretty'][0:8]
            d['temp'] = i['tempi']
            d['conds'] = i['conds']
            d['wdire'] = i['wdire']
            d['wdird'] = i['wdird']
            d['hail'] = i['hail']
            d['thunder'] = i['thunder']
            d['pressurei'] = i['pressurei']
            d['snow'] = i['snow']
            d['pressurem'] = i['pressurem']
            d['fog'] = i['fog']
            d['tornado'] = i['tornado']
            d['hum'] = i['hum']
            d['tempi'] = i['tempi']
            d['tempm'] = i['tempm']
            d['dewptm'] = i['dewptm']
            d['dewpti'] = i['dewpti']
            d['rain'] = i['rain']
            d['visim'] = i['visi']
            d['wspdi'] = i['wspdi']
            d['wspdm'] = i['wspdm']
            objects_list.append(d)
        
        j = json.dumps(objects_list)
        return j

In [55]:
query_results = get_weather_data('1d83c5de274645d4','Austin','TX', '20140501', '20140531')

In [56]:
df = pd.read_json(query_results)

In [76]:
def temp_column_to_array(column):
    return np.array(column.replace(-9999,np.nan).interpolate())

In [129]:
temp_array = temp_column_to_array(df['temp'])    
print df['time'][0]


12:51 AM

In [134]:
traces[0].series.index[0]


Out[134]:
Timestamp('2014-05-01 00:00:00', tz=None)

In [83]:
fig = plt.hist(temp_array,bins=20)



In [84]:
plt.plot(temp_array)


Out[84]:
[<matplotlib.lines.Line2D at 0x112e38ed0>]

In [107]:
#traces_array.pop(33)
traces_sum = np.sum(np.array(traces_array),axis=0)
plt.plot(traces_sum)


Out[107]:
[<matplotlib.lines.Line2D at 0x113d58fd0>]

In [108]:
print len(temp_array)
print len(traces_sum)


992
2956

In [121]:
resampled_traces_sum = np.sum(traces_sum.reshape((2956/4,4)),axis=1)
plt.plot(resampled_traces_sum)


Out[121]:
[<matplotlib.lines.Line2D at 0x113d050d0>]

In [122]:
len(resampled_traces_sum)


Out[122]:
739

In [128]:
n = 10
correlations = []
for i in range(n):
    correlations.append(np.correlate(resampled_traces_sum[i:739 - n + i],temp_array[0:739-n])[0])
correlations = np.array(correlations)
plt.plot(correlations)
print correlations


[ 12315661.02831334  12343118.79787333  12362427.17797333
  12373372.84024667  12372537.92702667  12359824.65991333
  12336947.10626666  12308285.06164667  12272671.36988667  12242659.0473    ]

On average, 3 hour delay


In [ ]: