Experimenting with storing the majority of object state information in a relational database (to make it easy to search) whilst wrapping this data in the appropriate object types (to enable easy processing). The relational database will just be a set of pandas.DataFrame objects, not a full-blown DBMS.

CONCLUSIONS SO FAR

  • I think it's more trouble that it's worth to try to keep tables of data
  • Instead let's stick to the 'old' way of doing it:
    • Dataset
    • Dataset.appliances = ApplianceGroup of every appliance in the dataset
    • Building.appliances = Dataset.appliances.select(building=building.id)
    • ApplianceGroup.select is responsible for all the selection machinery.
  • So, when we first load a dataset, construct all Appliance objects, put them into Dataset.appliances.

In [163]:
import pandas as pd

class Metadata(object):
    """Stores tables for appliance, dataset, building, electricity_meter, meter_tag etc"""
    def __init__(self):
        """create dummy data"""
        self.building = pd.DataFrame([['redd', 1, 'Washington'],
                                      ['redd', 2, 'New York']],
                                     columns=['dataset_id', 'building_instance', 'geo_location']).set_index(['dataset_id', 'building_instance'])
        
        # Construct objects and put these objects into the DataFrame.  
        # We do this for several reasons:
        #  - we can easily retrieve each object based on any
        #    metadata column.
        #  - we only construct a single object for each, um, object!
        self.electricity_meter = pd.DataFrame(
                                    [['redd', 1, 1, 'EnviR', -1, None],
                                     ['redd', 1, 2, 'EnviR', 1, None],
                                     ['redd', 1, 3, 'EnviR', 1, None]],
                                    columns=['dataset_id', 'building_instance', 'meter_instance', 
                                             'meter_type_id', 'submeter_of', 'meter_obj'])#.set_index(['dataset_id', 'building_instance', 'meter_instance'])

        for idx, meter in self.electricity_meter.iterrows():
            print meter['submeter_of']
            self.electricity_meter.loc[idx, 'submeter_of'] = 100
            
        for idx, meter in self.electricity_meter.iterrows():
            print meter['submeter_of']
        
        
        self.appliance_meter = pd.DataFrame([['redd', 1, 1, 'fridge', 1, None],
                                             ['redd', 1, 1, 'washer dryer', 2, None],
                                             ['redd', 1, 1, 'washer dryer', 3, None]], # dual supply
                                             columns=['dataset_id', 'building_instance', 'appliance_instance', 'appliance_type', 
                                               'meter_instance', 'appliance_obj']).set_index(['dataset_id', 'building_instance', 'appliance_instance', 'appliance_type'])

In [164]:
md = Metadata()
md.electricity_meter


-1
1
1
100
100
100
Out[164]:
dataset_id building_instance meter_instance meter_type_id submeter_of meter_obj
0 redd 1 1 EnviR 100 None
1 redd 1 2 EnviR 100 None
2 redd 1 3 EnviR 100 None

3 rows × 6 columns


In [114]:
md.building


Out[114]:
geo_location
dataset_id building_instance
redd 1 Washington
2 New York

2 rows × 1 columns


In [35]:
appliances = md.electricity_meter.merge(md.appliance)
appliances


Out[35]:
dataset_id building_instance meter_instance meter_type_id submeter_of appliance_instance appliance_type
0 redd 1 1 EnviR -1 1 fridge
1 redd 1 2 EnviR 1 1 washer dryer
2 redd 1 3 EnviR 1 1 washer dryer

3 rows × 7 columns


In [52]:



Out[52]:
dataset_id building_instance meter_instance meter_type_id submeter_of appliance_instance appliance_type
1 redd 1 2 EnviR 1 1 washer dryer
2 redd 1 3 EnviR 1 1 washer dryer

2 rows × 7 columns


In [99]:
class Appliance(object):

    def __init__(self, metadata, dataset_id, building_instance, appliance_type, appliance_instance=1):
        """Copy relevant attributes as read-only attributes.
        """
                
        # Load appliance metadata
        appliances = metadata.electricity_meter.merge(metadata.appliance)
        appliance_table = appliances[(appliances['dataset_id']==dataset_id) & 
                                     (appliances['building_instance']==building_instance) &
                                     (appliances['appliance_type']==appliance_type) &
                                     (appliances['appliance_instance']==appliance_instance)]
        
        self._type = appliance_table['appliance_type'].values[0]
        self._instance = appliance_table['appliance_instance'].values[0]
        self._meters = []
        for meter_instance in appliance_table['meter_instance'].values:
            self._meters.append(Meter(metadata, dataset_id, building_instance, meter_instance))
            
        self._mains = Mains(metadata, dataset_id, building_instance)
    

    @property
    def type(self):
        return self._type
    
    @property
    def instance(self):
        return self._instance
    
    @property
    def meters(self):
        return self._meters
    
    def is_dual_supply(self):
        return len(self.meters) == 2
    
    def __repr__(self):
        return '%s(type=%s, instance=%s, is_dual_supply=%s)' % (self.__class__.__name__, 
                                                                self.type, 
                                                                self.instance,
                                                                self.is_dual_supply())
    
    # TODO: how to get mains (for calculating proportion of energy)?  Or meter objects?

In [100]:
appliance = Appliance.from_metadata(md, 'redd', 1, 'washer dryer')

In [101]:
appliance


Out[101]:
Appliance(type=washer dryer, instance=1, is_dual_supply=True)

In [94]:
md.appliance


Out[94]:
dataset_id building_instance appliance_instance appliance_type meter_instance
0 redd 1 1 fridge 1
1 redd 1 1 washer dryer 2
2 redd 1 1 washer dryer 3

3 rows × 5 columns


In [98]:
fridge = Appliance.from_metadata(md, 'redd', 1, 'fridge')
fridge._table['submeter_of'] = 100


-c:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index,col_indexer] = value instead

In [ ]: