This step requires a connection to a SciDB cluster. This notebook assumes SciDB is available at http://localhost:8080. Replace this address with one that points to a SciDB connection.
Ready-to-roll SciDB AMIs for Amazon EC2 are available. Information is available on the SciDB forum at http://scidb.org/forum.
In [ ]:
import numpy as np
from scidbpy import connect, SciDBQueryError, SciDBArray
sdb = connect('http://localhost:8080')
In [ ]:
X = np.random.random((5, 4))
X_sci = sdb.from_array(X)
In [ ]:
# SciDBArray objects provide a mix of familiar numpy things and some SciDB-specific things:
X_sci.shape # The usual shape
In [ ]:
X_sci.name # SciDB array name
In [ ]:
# Let's list all the SciDB arrays in the database:
sdb.list_arrays().keys()
In [ ]:
# Create a 10x10 array of double-precision zeros:
A = sdb.zeros( (10,10,) )
# Create a 10x10 array of 64-bit signed integer ones:
B = sdb.ones( (10,10), dtype='int64' )
# Create a 10x10 array of numbers between -1 and 2 (inclusive) sampled from a uniform random distribution.
C = sdb.random( (10,10), lower=-1, upper=2)
# Create a vector of 5 equally spaced numbers between 1 and 10, including the endpoints:
D = sdb.linspace(1,10,num=5,endpoint=True)
# Create a 10x10 sparse, double-precision-valued identity matrix:
E = sdb.identity(10, dtype='double', sparse=True)
In [ ]:
tridiag = sdb.new_array((10, 10))
sdb.query('store(build({A}, \
iif({A.d0}={A.d1}, 2, iif({A.d0} <= {A.d1}+1 and {A.d0} >= {A.d1}-1, -1, 0))), {A})', A=tridiag)
In [ ]:
tridiag.toarray()
In [ ]:
# Create an array directly in SciDB via the query interface:
sdb.query("store(build(<v:double>[i=1:10,5,0,j=1:5,5,0],i+j),Z)")
# Now associate that array with a SciDBArray object in our Python session:
Z = sdb.wrap_array(scidbname="Z")
Z
The scope of SciDB arrays connected to SciDBArray objects
The persistent argument of the new_array method determines if a SciDB array should be removed when its corresponding Python reference falls out of scope.
In [ ]:
X = sdb.random((10,10))
xname = X.name
xname
In [ ]:
# Let's delete X from the database:
X.reap()
# and check to see if its array is still in SciDB:
sdb.list_arrays().has_key(xname)
# Note: Temporary arrays are automatically reaped when python exits
Materializing data to Python
In [ ]:
# Materialize SciDB array to Python as a numpy array:
tridiag.toarray()
In [ ]:
# Materialize SciDB array to Python as a sparse array:
from scipy import sparse
tridiag.tosparse('csr')
In [ ]:
# Define a 3x10 subarray (returned as a new SciDBArray object)
X = tridiag[2:5,:]
X.toarray()
In [ ]:
X = tridiag[2:5,::2]
X.toarray()
In [ ]:
tridiag.sum()[0]
In [ ]:
tridiag.var()[0]
In [ ]:
print C.min(0).toarray() # Aggregate minimum over rows for each column
print C.min(1).toarray() # Aggregate minimum over columns for each row
...also min max std mean approxdc and count.
In [ ]:
# Compare Python and SciDB
sin_tri = sdb.sin(tridiag)
np.linalg.norm(sin_tri.toarray() - np.sin(tridiag.toarray()))
In [ ]:
tri_reshape = tridiag.reshape((20,5))
print tri_reshape.shape
print tri_reshape.transpose().shape
In [ ]:
X = sdb.from_array(np.random.random((10,10)))
Y = sdb.from_array(np.random.random((10,10)))
S = X + Y
D = X - Y
M = 2 * X
(S + D - M).sum()[0]
In [ ]:
sdb.dot(X,Y)
In [ ]:
sdb.dot(X.T, Y[:,1]).toarray()