In [1]:
import os
import xml.etree.ElementTree as ET
import datetime
import random
import pymongo as mongo
from collections import OrderedDict
import pandas as pd
import OakParkDatasetAdapter as opda
In [26]:
reload(opda)
Out[26]:
In [8]:
##change this part to where your files are
#path = os.path.join(os.getcwd(), 'xml_files')
#xml_files = [f for f in os.listdir(path)]
In [23]:
def get_tree(xml_file):
tree = ET.parse('{}'.format(xml_file))
root = tree.getroot()
return tree,root
In [10]:
def get_interval_data(tree_root):
base_interval_string = './/{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}content/{http://naesb.org/espi}IntervalBlock/'
S = '{http://naesb.org/espi}'
interval = tree_root.findall('{0}/{1}{2}'.format(base_interval_string,S,'interval/'))
interval_dict = {interval[i].tag.split('{http://naesb.org/espi}')[1]:interval[i].text for i in range(len(interval))}
interval_dict['start']=datetime.datetime.fromtimestamp(float(interval_dict['start']))
return interval_dict
In [11]:
def get_interval_block(tree_root):
base_interval_string = './/{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}content/{http://naesb.org/espi}IntervalBlock/'
S = '{http://naesb.org/espi}'
reading = tree_root.findall('{0}/{1}{2}'.format(base_interval_string,S,'IntervalReading/'))
values = []
index_date = []
bigger = []
dict_test = {}
for r in range(0,len(reading)-1,2):
index_date.append(datetime.datetime.fromtimestamp(float(reading[r][1].text)))
values.append(reading[r+1].text)
bigger.append([datetime.datetime.fromtimestamp(float(reading[r][1].text)),{'value':float(reading[r+1].text)/1e3}])
#dict_test[str(datetime.datetime.utcfromtimestamp(float(reading[r][1].text)))] =reading[r+1].text
return bigger
In [12]:
def get_currency(tree_root):
base_interval_string = './/{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}content/{http://naesb.org/espi}ReadingType/'
S = '{http://naesb.org/espi}'
currency = tree_root.findall('{0}/{1}{2}'.format(base_interval_string,S,'currency'))
return currency[0].text
In [13]:
def get_address(tree_root):
return tree_root.findall('{0}'.format('.//{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}title'))[0].text
In [19]:
def get_meta(tree_root):
meta_data = {}
meta_data['updated'] = tree_root.findall('{0}'.format('.//{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}updated'))[0].text
meta_data['published'] = tree_root.findall('{0}'.format('.//{http://www.w3.org/2005/Atom}entry/{http://www.w3.org/2005/Atom}published'))[0].text
meta_data['interval'] = get_interval_data(tree_root)
meta_data['currency'] = get_currency(tree_root)
meta_data['account_number']=None
return meta_data
In [21]:
def get_xml_house(xml_file):
tree,root = get_tree(xml_file)
address = get_address(root)
house ={}
house['meta'] = get_meta(root)
house['meta']['dataid']=random.randint(100,9999)
house['interval_readings'] = (get_interval_block(root))
return house
In [24]:
t = 'cec_electric_interval_data_Service 1_2012-07-25_to_2014-07-16.xml'
xml_house = get_xml_house(t)
In [27]:
trace = opda.get_trace_from_intermediate_xml(xml_house)
In [29]:
trace.series
Out[29]:
In [ ]: