In [53]:
%matplotlib inline
import sys
import os
sys.path.append(os.path.join(os.pardir,os.pardir))
import disaggregator as da
from disaggregator import PecanStreetDatasetAdapter as psda
import matplotlib.pyplot as plt
import numpy as np
import json
import urllib2
import pandas as pd
import collections
from scipy import stats
from datetime import datetime, timedelta, date
In [4]:
db_url = "postgresql://USERNAME:PASSWORD@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)
In [35]:
psda.get_table_names('shared')
Out[35]:
In [36]:
dataids = psda.get_dataids_with_real_values('shared','validated_05_2014','use')
In [49]:
traces = psda.generate_traces_for_appliance_by_dataids('shared', 'validated_05_2014', 'use', dataids[:50], sample_rate='15T')
In [50]:
traces_array = [np.array(trace.series.values,dtype=np.float) for trace in traces]
In [51]:
for trace_array,dataid in zip(traces_array,dataids):
fig = plt.hist(trace_array,bins=80)
plt.title("dataid: {}".format(dataid))
plt.ylabel("#occurences in a month")
plt.xlabel("kW bin")
plt.show()
In [54]:
def get_weather_data(api,city,state,start_date,end_date):
if(start_date is not None and end_date is not None):
#format our date structure to pass to our http request
date_format = "%Y%m%d"
a = datetime.strptime(start_date, date_format)
b = datetime.strptime(end_date, date_format)
#get number of days from start_date to end_date
delta = b - a
num_days = delta.days
objects_list = []
#create new variable that will create query's for the api
for year in range(0,num_days + 1):
#count from start_date to end_date
dates = a + timedelta(days=year)
#format our str with our date_format
formatted_dates = datetime.strftime(dates, date_format)
#create query which will iterate through desired weather period
query = 'http://api.wunderground.com/api/'+ api +'/history_'+formatted_dates+'/q/'+state+'/'+city+'.json'
#iterate through the number of days and query the api. dump json results every time
f = urllib2.urlopen(query)
#read query as a json string
json_string = f.read()
#parse/load json string
parsed_json = json.loads(json_string)
#Iterate through each json object and append it to an ordered dictionary
for i in parsed_json['history']['observations']:
d = collections.OrderedDict()
d['date'] = i['date']['mon'] + '/' + i['date']['mday'] + '/' + i['date']['year']
d['time'] = i['date']['pretty'][0:8]
d['temp'] = i['tempi']
d['conds'] = i['conds']
d['wdire'] = i['wdire']
d['wdird'] = i['wdird']
d['hail'] = i['hail']
d['thunder'] = i['thunder']
d['pressurei'] = i['pressurei']
d['snow'] = i['snow']
d['pressurem'] = i['pressurem']
d['fog'] = i['fog']
d['tornado'] = i['tornado']
d['hum'] = i['hum']
d['tempi'] = i['tempi']
d['tempm'] = i['tempm']
d['dewptm'] = i['dewptm']
d['dewpti'] = i['dewpti']
d['rain'] = i['rain']
d['visim'] = i['visi']
d['wspdi'] = i['wspdi']
d['wspdm'] = i['wspdm']
objects_list.append(d)
#dump the dictionary into a json object
j = json.dumps(objects_list)
#append our json object to a list for every day and return its data
# print j
return j
#If we just need the data for ONE day (pass None for end_date):
if(end_date is None):
f = urllib2.urlopen('http://api.wunderground.com/api/1d83c5de274645d4/history_'+start_date+'/q/'+state+'/'+city+'.json')
json_string = f.read()
parsed_json = json.loads(json_string)
objects_list = []
for i in parsed_json['history']['observations']:
d = collections.OrderedDict()
d['date'] = i['date']['mon'] + '/' + i['date']['mday'] + '/' + i['date']['year']
d['time'] = i['date']['pretty'][0:8]
d['temp'] = i['tempi']
d['conds'] = i['conds']
d['wdire'] = i['wdire']
d['wdird'] = i['wdird']
d['hail'] = i['hail']
d['thunder'] = i['thunder']
d['pressurei'] = i['pressurei']
d['snow'] = i['snow']
d['pressurem'] = i['pressurem']
d['fog'] = i['fog']
d['tornado'] = i['tornado']
d['hum'] = i['hum']
d['tempi'] = i['tempi']
d['tempm'] = i['tempm']
d['dewptm'] = i['dewptm']
d['dewpti'] = i['dewpti']
d['rain'] = i['rain']
d['visim'] = i['visi']
d['wspdi'] = i['wspdi']
d['wspdm'] = i['wspdm']
objects_list.append(d)
j = json.dumps(objects_list)
return j
In [55]:
query_results = get_weather_data('1d83c5de274645d4','Austin','TX', '20140501', '20140531')
In [56]:
df = pd.read_json(query_results)
In [76]:
def temp_column_to_array(column):
return np.array(column.replace(-9999,np.nan).interpolate())
In [129]:
temp_array = temp_column_to_array(df['temp'])
print df['time'][0]
In [134]:
traces[0].series.index[0]
Out[134]:
In [83]:
fig = plt.hist(temp_array,bins=20)
In [84]:
plt.plot(temp_array)
Out[84]:
In [107]:
#traces_array.pop(33)
traces_sum = np.sum(np.array(traces_array),axis=0)
plt.plot(traces_sum)
Out[107]:
In [108]:
print len(temp_array)
print len(traces_sum)
In [121]:
resampled_traces_sum = np.sum(traces_sum.reshape((2956/4,4)),axis=1)
plt.plot(resampled_traces_sum)
Out[121]:
In [122]:
len(resampled_traces_sum)
Out[122]:
In [128]:
n = 10
correlations = []
for i in range(n):
correlations.append(np.correlate(resampled_traces_sum[i:739 - n + i],temp_array[0:739-n])[0])
correlations = np.array(correlations)
plt.plot(correlations)
print correlations
On average, 3 hour delay
In [ ]: