test netcdf+

This is a more extensive integration test, if all the features of netcdf+ work as expected.


In [1]:
from __future__ import print_function

import openpathsampling as paths
from openpathsampling.netcdfplus import (
    NetCDFPlus, 
    ObjectStore, 
    StorableObject,
    NamedObjectStore,
    UniqueNamedObjectStore,
    DictStore,
    ImmutableDictStore,
    VariableStore,
    StorableNamedObject
)
import numpy as np

In [2]:
# force numpy print options for test comparison
np.set_printoptions(precision=6, formatter={'float_kind': lambda x: "{:.6f}".format(x)})

In [3]:
class Node(StorableObject):
    def __init__(self, value):
        super(Node, self).__init__()
        self.value = value
        
    def __repr__(self):
        return 'Node(%s)' % self.value

In [4]:
class NamedNode(StorableNamedObject):
    def __init__(self, value):
        super(NamedNode, self).__init__()
        self.value = value
        
    def __repr__(self):
        return 'Node(%s)' % self.value

Open new storage

try to create a new storage


In [5]:
st = NetCDFPlus('test_netcdfplus.nc', mode='w')

Create some stores


In [6]:
class NodeIntStore(VariableStore):
    def __init__(self):
        super(NodeIntStore, self).__init__(Node, ['value'])
    
    def initialize(self):
        super(VariableStore, self).initialize()

        # Add here the stores to be imported
        self.create_variable('value', 'int')

In [7]:
st.create_store('nodesnamed', NamedObjectStore(NamedNode))
st.create_store('nodesunique', UniqueNamedObjectStore(NamedNode))
st.create_store('dict', DictStore())
st.create_store('dictimmutable', ImmutableDictStore())
st.create_store('varstore', NodeIntStore())

And the default store. The last store for a particular object is used as the default if no specific store is specified.


In [8]:
st.create_store('nodes', ObjectStore(Node))

In [9]:
#! lazy
# lazy because output checking fails in Py3k tests -- why is that?
print(st.find_store(Node))


store.nodes[Node] : (not created)

Initialize the store


In [10]:
st.finalize_stores()

In [11]:
v = st.variables['nodes_uuid']

In [12]:
v.chunking()


Out[12]:
[65536]

In [13]:
print(st.find_store(Node))


store.nodes[Node] : 0 object(s)

In [14]:
st.nodes.save(Node(10));

In [15]:
st.close()

Reopen empty storage


In [16]:
st = NetCDFPlus('test_netcdfplus.nc', mode='a')

set caching of the new stores


In [17]:
for store in st.stores:
    store.set_caching(10)

Check if the stores were correctly loaded


In [18]:
assert('nodes' in st.objects)

In [19]:
assert('stores' in st.objects)

In [20]:
assert(len(st.nodes) == 1)

In [21]:
assert(len(st.stores) == 7)

In [22]:
for store in st.stores:
    print('{:40} {:30}'.format(str(store), str(store.cache)))


store.attributes[PseudoAttribute] : 0 object(s) WeakLRUCache(0/0 of 10/Inf)   
store.nodesnamed[NamedNode] : 0 object(s) WeakLRUCache(0/0 of 10/Inf)   
store.nodesunique[NamedNode] : 0 object(s) WeakLRUCache(0/0 of 10/Inf)   
store.dict[None/ANY] : 0 object(s)       WeakLRUCache(0/0 of 10/Inf)   
store.dictimmutable[None/ANY] : 0 object(s) WeakLRUCache(0/0 of 10/Inf)   
store.varstore[Node] : 0 object(s)       WeakLRUCache(0/0 of 10/Inf)   
store.nodes[Node] : 1 object(s)          WeakLRUCache(0/0 of 10/Inf)   

Create variables types

Get a list of all possible variable types


In [23]:
print(sorted(st.get_var_types()))


['bool', 'float', 'index', 'int', 'json', 'jsonobj', 'lazyobj', u'lazyobj.attributes', u'lazyobj.dict', u'lazyobj.dictimmutable', u'lazyobj.nodes', u'lazyobj.nodesnamed', u'lazyobj.nodesunique', 'lazyobj.stores', u'lazyobj.varstore', u'lazyuuid.attributes', u'lazyuuid.dict', u'lazyuuid.dictimmutable', u'lazyuuid.nodes', u'lazyuuid.nodesnamed', u'lazyuuid.nodesunique', 'lazyuuid.stores', u'lazyuuid.varstore', 'length', 'long', 'numpy.float32', 'numpy.float64', 'numpy.int16', 'numpy.int32', 'numpy.int64', 'numpy.int8', 'numpy.uint16', 'numpy.uint32', 'numpy.uint64', 'numpy.uint8', 'obj', u'obj.attributes', u'obj.dict', u'obj.dictimmutable', u'obj.nodes', u'obj.nodesnamed', u'obj.nodesunique', 'obj.stores', u'obj.varstore', 'store', 'str', 'uuid', u'uuid.attributes', u'uuid.dict', u'uuid.dictimmutable', u'uuid.nodes', u'uuid.nodesnamed', u'uuid.nodesunique', 'uuid.stores', u'uuid.varstore']

Make a dimension on length 2 to simplify dimension nameing.

Now we construct for each type a corresponding variable of dimensions 2x2x2.


In [24]:
st.create_dimension('pair', 2)

In [25]:
for var_type in st.get_var_types():
    st.create_variable(var_type, var_type, dimensions=('pair', 'pair', 'pair'))

In [26]:
st.update_delegates()

In [27]:
for var_name, var in sorted(st.variables.items()):
    print(var_name, var.dimensions)


attributes_cache (u'attributes',)
attributes_json (u'attributes',)
attributes_name (u'attributes',)
attributes_uuid (u'attributes',)
bool (u'pair', u'pair', u'pair')
dict_json (u'dict',)
dict_name (u'dict',)
dict_uuid (u'dict',)
dictimmutable_json (u'dictimmutable',)
dictimmutable_name (u'dictimmutable',)
dictimmutable_uuid (u'dictimmutable',)
float (u'pair', u'pair', u'pair')
index (u'pair', u'pair', u'pair')
int (u'pair', u'pair', u'pair')
json (u'pair', u'pair', u'pair')
jsonobj (u'pair', u'pair', u'pair')
lazyobj (u'pair', u'pair', u'pair', u'pair')
lazyobj.attributes (u'pair', u'pair', u'pair')
lazyobj.dict (u'pair', u'pair', u'pair')
lazyobj.dictimmutable (u'pair', u'pair', u'pair')
lazyobj.nodes (u'pair', u'pair', u'pair')
lazyobj.nodesnamed (u'pair', u'pair', u'pair')
lazyobj.nodesunique (u'pair', u'pair', u'pair')
lazyobj.stores (u'pair', u'pair', u'pair')
lazyobj.varstore (u'pair', u'pair', u'pair')
lazyuuid.attributes (u'pair', u'pair', u'pair')
lazyuuid.dict (u'pair', u'pair', u'pair')
lazyuuid.dictimmutable (u'pair', u'pair', u'pair')
lazyuuid.nodes (u'pair', u'pair', u'pair')
lazyuuid.nodesnamed (u'pair', u'pair', u'pair')
lazyuuid.nodesunique (u'pair', u'pair', u'pair')
lazyuuid.stores (u'pair', u'pair', u'pair')
lazyuuid.varstore (u'pair', u'pair', u'pair')
length (u'pair', u'pair', u'pair')
long (u'pair', u'pair', u'pair')
nodes_json (u'nodes',)
nodes_uuid (u'nodes',)
nodesnamed_json (u'nodesnamed',)
nodesnamed_name (u'nodesnamed',)
nodesnamed_uuid (u'nodesnamed',)
nodesunique_json (u'nodesunique',)
nodesunique_name (u'nodesunique',)
nodesunique_uuid (u'nodesunique',)
numpy.float32 (u'pair', u'pair', u'pair')
numpy.float64 (u'pair', u'pair', u'pair')
numpy.int16 (u'pair', u'pair', u'pair')
numpy.int32 (u'pair', u'pair', u'pair')
numpy.int64 (u'pair', u'pair', u'pair')
numpy.int8 (u'pair', u'pair', u'pair')
numpy.uint16 (u'pair', u'pair', u'pair')
numpy.uint32 (u'pair', u'pair', u'pair')
numpy.uint64 (u'pair', u'pair', u'pair')
numpy.uint8 (u'pair', u'pair', u'pair')
obj (u'pair', u'pair', u'pair', u'pair')
obj.attributes (u'pair', u'pair', u'pair')
obj.dict (u'pair', u'pair', u'pair')
obj.dictimmutable (u'pair', u'pair', u'pair')
obj.nodes (u'pair', u'pair', u'pair')
obj.nodesnamed (u'pair', u'pair', u'pair')
obj.nodesunique (u'pair', u'pair', u'pair')
obj.stores (u'pair', u'pair', u'pair')
obj.varstore (u'pair', u'pair', u'pair')
store (u'pair', u'pair', u'pair')
stores_json (u'stores',)
stores_name (u'stores',)
stores_uuid (u'stores',)
str (u'pair', u'pair', u'pair')
uuid (u'pair', u'pair', u'pair')
uuid.attributes (u'pair', u'pair', u'pair')
uuid.dict (u'pair', u'pair', u'pair')
uuid.dictimmutable (u'pair', u'pair', u'pair')
uuid.nodes (u'pair', u'pair', u'pair')
uuid.nodesnamed (u'pair', u'pair', u'pair')
uuid.nodesunique (u'pair', u'pair', u'pair')
uuid.stores (u'pair', u'pair', u'pair')
uuid.varstore (u'pair', u'pair', u'pair')
varstore_uuid (u'varstore',)
varstore_value (u'varstore',)

In [28]:
for var in sorted(st.vars):
    print(var)


attributes_cache
attributes_json
attributes_name
attributes_uuid
bool
dict_json
dict_name
dict_uuid
dictimmutable_json
dictimmutable_name
dictimmutable_uuid
float
index
int
json
jsonobj
lazyobj
lazyobj.attributes
lazyobj.dict
lazyobj.dictimmutable
lazyobj.nodes
lazyobj.nodesnamed
lazyobj.nodesunique
lazyobj.stores
lazyobj.varstore
lazyuuid.attributes
lazyuuid.dict
lazyuuid.dictimmutable
lazyuuid.nodes
lazyuuid.nodesnamed
lazyuuid.nodesunique
lazyuuid.stores
lazyuuid.varstore
length
long
nodes_json
nodes_uuid
nodesnamed_json
nodesnamed_name
nodesnamed_uuid
nodesunique_json
nodesunique_name
nodesunique_uuid
numpy.float32
numpy.float64
numpy.int16
numpy.int32
numpy.int64
numpy.int8
numpy.uint16
numpy.uint32
numpy.uint64
numpy.uint8
obj
obj.attributes
obj.dict
obj.dictimmutable
obj.nodes
obj.nodesnamed
obj.nodesunique
obj.stores
obj.varstore
store
stores_json
stores_name
stores_uuid
str
uuid
uuid.attributes
uuid.dict
uuid.dictimmutable
uuid.nodes
uuid.nodesnamed
uuid.nodesunique
uuid.stores
uuid.varstore
varstore_uuid
varstore_value

Bool


In [29]:
st.vars['bool'][:] = True

In [30]:
print(st.vars['bool'][:])


[[[True, True], [True, True]], [[True, True], [True, True]]]

Float


In [31]:
st.vars['float'][1,1] = 1.0

In [32]:
print(st.vars['float'][:])


[[[None, None], [None, None]], [[None, None], [1.0, 1.0]]]

Index

Index is special in the sense that it supports only integers that are non-negative. Negative values will be interpreted as None


In [33]:
st.vars['index'][0,1,0] = 10
st.vars['index'][0,1,1] = -1
st.vars['index'][0,0] = None

In [34]:
print(st.vars['index'][0,1])
print(st.vars['index'][0,0])


[10, None]
[None, None]

Int


In [35]:
st.vars['int'][0,1,0] = 10
st.vars['int'][0,1,1] = -1

In [36]:
print(st.vars['int'][:])


[[[None, None], [10, -1]], [[None, None], [None, None]]]

JSON

The variable type JSON encode the given object as a JSON string in the shortest possible way. This includes using referenes to storable objects.


In [37]:
st.vars['json'][0,1,1] = {'Hallo': 2, 'Test': 3}

In [38]:
print(st.vars['json'][0,1,1])


{'Test': 3, 'Hallo': 2}

In [39]:
st.vars['json'][0,1,0] = Node(10)

In [40]:
#! lazy
print(st.variables['json'][0,1,:])


[u'{"_store":"nodes","_hex_uuid":"0xebbeb5f0b75311e89830000000000024L"}'
 u'{"Test":3,"Hallo":2}']

All object types registered as being Storable by subclassing from openpathsampling.base.StorableObject.

JSONObj

A JSON serializable object. This can be normal very simple python objects, plus numpy arrays, and objects that implement to_dict and from_dict. This is almost the same as JSON except if the object to be serialized is a storable object itself, it will not be referenced but the object itself will be turned into a JSON representation.


In [41]:
nn = Node(10)
st.vars['jsonobj'][1,0,0] = nn

In [42]:
print(st.variables['jsonobj'][1,0,0])


{"_cls":"Node","_dict":{"value":10}}

In [43]:
st.vars['jsonobj'][1,0,0]


Out[43]:
Node(10)

Numpy


In [44]:
st.vars['numpy.float32'][:] = np.ones((2,2,2)) * 3.0
st.vars['numpy.float32'][0] = np.ones((2,2)) * 7.0

In [45]:
print(st.vars['numpy.float32'][:])


[[[7.000000 7.000000]
  [7.000000 7.000000]]

 [[3.000000 3.000000]
  [3.000000 3.000000]]]

Obj

You can store objects of a type which you have previously added. For loading you need to make sure that the class (and the store if set manually) is present when you load from the store.


In [46]:
st.vars['obj.nodes'][0,0,0] = Node(1)
st.vars['obj.nodes'][0,1,0] = Node('Second')
st.vars['obj.nodes'][0,0,1] = Node('Third')

In [47]:
# st.vars['obj.nodes'][1] = Node(20)

In [48]:
print(st.variables['obj.nodes'][:])
print(st.variables['nodes_json'][:])


[[[u'ebbeb5f0-b753-11e8-9830-00000000002a'
   u'ebbeb5f0-b753-11e8-9830-00000000002e']
  [u'ebbeb5f0-b753-11e8-9830-00000000002c' u'']]

 [[u'' u'']
  [u'' u'']]]
[u'{"_cls":"Node","_dict":{"value":10}}'
 u'{"_cls":"Node","_dict":{"value":10}}'
 u'{"_cls":"Node","_dict":{"value":1}}'
 u'{"_cls":"Node","_dict":{"value":"Second"}}'
 u'{"_cls":"Node","_dict":{"value":"Third"}}']

In [49]:
print(st.vars['obj.nodes'][0,0,0])
print(type(st.vars['obj.nodes'][0,0,0]))


Node(1)
<class '__main__.Node'>

lazy

Lazy loading will reconstruct an object using proxies. These proxies behave almost like the loaded object, but will delay loading of the object until it is accessed. Saving for lazy objects is the same as for regular objects. Only loading return a proxy object.


In [50]:
st.vars['lazyobj.nodes'][0,0,0] = Node('First')

The type of the returned object is LoaderProxy while the class is the actual class is the baseclass loaded by the store to not trigger loading when the __class__ attribute is accessed. The actual object can be accessed by __subject__ and doing so will trigger loading the object. All regular attributes will be delegated to __subject__.attribute and also trigger loading.


In [51]:
#! lazy
proxy = st.vars['lazyobj.nodes'][0,0,0]
print('Type:   ', type(proxy))
print('Class:  ', proxy.__class__)
print('Content:', proxy.__subject__.__dict__)
print('Access: ', proxy.value)


Type:    <class 'openpathsampling.netcdfplus.proxy.LoaderProxy'>
Class:   <class '__main__.Node'>
Content: {'__uuid__': 313358805600210293968778471146830954544L, 'value': 'First'}
Access:  First

Load/Save objects

Note that there are now 6 Node objects.


In [52]:
print(st.nodes[:])


[Node(10), Node(10), Node(1), Node(Second), Node(Third), Node(First)]

In [53]:
obj = Node('BlaBla')
st.nodes.save(obj);

Saving without specifying should use store nodes which was defined last.


In [54]:
print(len(st.nodes))
obj = Node('BlaBlaBla')
st.save(obj)
print(len(st.nodes))


7
8

Get the index of the obj in the storage


In [55]:
print(st.idx(obj))


7

And test the different ways to access the contained json

1. direct json using variables in the store


In [56]:
print(st.nodes.variables['json'][st.idx(obj)])


{"_cls":"Node","_dict":{"value":"BlaBlaBla"}}

2. direct json using variables in the full storage


In [57]:
print(st.variables['nodes_json'][st.idx(obj)])


{"_cls":"Node","_dict":{"value":"BlaBlaBla"}}

3. indirect json and reconstruct using vars in the store


In [58]:
print(st.nodes.vars['json'][st.idx(obj)])
print(st.nodes.vars['json'][st.idx(obj)] is obj)


Node(BlaBlaBla)
False

4. using the store accessor __getitem__ in the store


In [59]:
print(st.nodes[st.idx(obj)])
print(st.nodes[st.idx(obj)] is obj)


Node(BlaBlaBla)
True

One importance difference is that a store like nodes has a cache (which we set to 10 before). Using vars will not use a store and hence create a new object!

ObjectStores

ObjectStores are resposible to save and load objects. There are now 6 types available.

ObjectStore

The basic store which we have used before

NamedObjectStore

Supports to give objects names


In [60]:
n = NamedNode(3)

NamedObjects have a .name property, which has a default.


In [61]:
print(n.name)


[NamedNode]

and can be set.


In [62]:
n.name = 'OneNode'
print(n.name)
n.name = 'MyNode'
print(n.name)


OneNode
MyNode

Once the object is saved, the name cannot be changed anymore.


In [63]:
st.nodesnamed.save(n);

In [64]:
try:
    n.name = 'NewName'
except ValueError as e:
    print('# We had an exception')
    print(e)
else:
    raise RuntimeWarning('This should have produced an error')


# We had an exception
Objects cannot be renamed to `NewName` after is has been saved, it is already named `MyNode`

usually names are not unique (see next store). So we can have more than one object with the same name.


In [65]:
n2 = NamedNode(9)
n2.name = 'MyNode'
st.nodesnamed.save(n2);

See the list of named objects


In [66]:
print(st.nodesnamed.name_idx)


{'MyNode': set([0, 1])}

UniqueNamedObjectStore

The forces names to be unique


In [67]:
st.nodesunique.save(n);

Note here that an object can be store more than once in a storage, but only if more than one store supports the file type.


In [68]:
try:
    st.nodesunique.save(n2)
except RuntimeWarning as e:
    print('# We had an exception')
    print(e)
else:
    raise RuntimeWarning('This should have produced an error')


# We had an exception
Current name "MyNode" is already taken in unique name store. This means you cannot save object "Node(9)" at all. In general this should not happen to unsaved objects unless you fixed the name of the object yourself. Check your code for the generation of objects of the same name.

As said before this can only happen if you have more than one store for the same object type.


In [69]:
print(st.nodesunique.name_idx)


{'MyNode': set([0])}

some more tests. First saving onnamed objects. This is okay. Only given names should be unique.


In [70]:
n3 = NamedNode(10)
n4 = NamedNode(12)
st.nodesunique.save(n3);
st.nodesunique.save(n4);

In [71]:
n5 = NamedNode(1)
n5.name = 'MyNode'

In [72]:
try:
    st.nodesunique.save(n5)
except RuntimeWarning as e:
    print('# We had an exception')
    print(e)
else:
    raise RuntimeWarning('This should have produced an error')


# We had an exception
Current name "MyNode" is already taken in unique name store nodesunique. Try renaming object or saving using other name.

This works since it does a rename before saving.


In [73]:
st.nodesunique.save(n5, 'NextNode');

In [74]:
n6 = NamedNode(1)
n6.name = 'SecondNode'

In [75]:
try:
    st.nodesunique.save(n6, 'MyNode')
except RuntimeWarning as e:
    print('# We had an exception')
    print(e)
else:
    raise RuntimeWarning('This should have produced an error')


# We had an exception
New name "MyNode" already taken in unique name store. Try different name instead./nCurrent name "SecondNode" is still free. Saving without giving a specific name should work

DictStore

A dictstore works a like a dictionary on disk. The content is returned using dict()


In [76]:
print(dict(st.dict))
print(st.dict.name_idx)


{}
{}

In [77]:
n1 = NamedNode(1)
n2 = NamedNode(2)
n3 = NamedNode(3)
st.dict['Number1'] = n1

In [78]:
for key in sorted(st.dict):
    obj = st.dict[key]
    idxs = sorted(st.dict.name_idx[key])
    print(key, ':', str(obj), idxs)


Number1 : Node(1) [0]

In [79]:
st.dict['Number2'] = n2

In [80]:
for key in sorted(st.dict):
    obj = st.dict[key]
    idxs = sorted(st.dict.name_idx[key])
    print(key, ':', str(obj), idxs)


Number1 : Node(1) [0]
Number2 : Node(2) [1]

In [81]:
st.dict['Number1'] = n3

In [82]:
for key in sorted(st.dict):
    obj = st.dict[key]
    idxs = sorted(st.dict.name_idx[key])
    print(key, ':', str(obj), idxs)


Number1 : Node(3) [0, 2]
Number2 : Node(2) [1]

In [83]:
print(st.dict['Number1'])


Node(3)

In [84]:
print(st.dict.find('Number1'))


Node(3)

In [85]:
print('[', ', '.join(st.dict.variables['json'][:]), ']')


[ {"_store":"nodesunique","_hex_uuid":"0xebbeb5f0b75311e89830000000000048L"}, {"_store":"nodesunique","_hex_uuid":"0xebbeb5f0b75311e8983000000000004aL"}, {"_store":"nodesunique","_hex_uuid":"0xebbeb5f0b75311e8983000000000004cL"} ]

In [86]:
for key in sorted(st.dict):
    obj = st.dict[key]
    idxs = sorted(st.dict.name_idx[key])
    print(key, ':', str(obj), idxs)


Number1 : Node(3) [0, 2]
Number2 : Node(2) [1]

ImmutableDictStore

This adds the check that already used names cannot be used again


In [87]:
try:
    st.dictimmutable['Number1'] = n1
    st.dictimmutable['Number1'] = n2
except RuntimeWarning as e:
    print('# We had an exception')
    print(e)
else:
    raise RuntimeWarning('This should have produced an error')


# We had an exception
Cannot re-save existing key "Number1" in immutable dict store.

VariableStore

Store a node with an int as we defined for our VariableStore


In [88]:
a = Node(30)
st.varstore.save(a);

clear the cache


In [89]:
st.varstore.clear_cache()

And try loading


In [90]:
assert(st.varstore[0].value == 30)

Try storing non int() parseable value


In [91]:
try:
    a = Node('test')
    print(st.varstore.save(a))
except ValueError as e:
    print('# We had an exception')
    print(e)
else:
    raise RuntimeWarning('This should have produced an error')


# We had an exception
invalid literal for int() with base 10: 'test'

Test fallback


In [92]:
st_uuid = NetCDFPlus('test_netcdfplus_uuid.nc', mode='w')
st_uuid.create_store('nodes', ObjectStore(Node))
st_uuid.finalize_stores()
st_uuid.save(st.nodes[0])
st_uuid.close()

In [93]:
st.close()

In [94]:
st_fb = NetCDFPlus('test_netcdfplus_fb.nc', mode='w', fallback=NetCDFPlus('test_netcdfplus_uuid.nc'))
st_fb.create_store('nodes', ObjectStore(Node))
st_fb.finalize_stores()

In [95]:
st_fb.exclude_from_fallback


Out[95]:
True

In [96]:
assert(st_fb.fallback.nodes[0] in st_fb.fallback)

In [97]:
assert(st_fb.fallback.nodes[0] in st_fb)

In [98]:
assert(st.nodes[0] in st_fb)

In [99]:
assert(st.nodes[0] in st_fb.fallback)

Try saving object in fallback


In [100]:
print(hex(st_fb.nodes.save(st_fb.fallback.nodes[0])))


0xebbeb5f0b75311e89830000000000012L

In [101]:
assert(len(st_fb.nodes) == 0)

In [102]:
assert(st_fb.fallback.nodes[0] in st_fb)
assert(st_fb.fallback.nodes[0] in st_fb.fallback)
assert(st.nodes[0] in st_fb)
assert(st.nodes[0] in st_fb.fallback)

In [103]:
st_fb.fallback.close()
st_fb.close()