In [1]:
from blaze import Data, by, compute
In [2]:
x = Data(1)
x
Out[2]:
In [3]:
x.dshape
Out[3]:
In [4]:
x + 1
Out[4]:
In [5]:
print type(x + 1)
print type(compute(x + 1))
In [6]:
x = Data([1, 2, 3, 4, 5])
x
Out[6]:
In [7]:
x[x > 2] * 10
Out[7]:
In [8]:
x.dshape
Out[8]:
In [9]:
L = [[1, 'Alice', 100],
[2, 'Bob', -200],
[3, 'Charlie', 300],
[4, 'Dennis', 400],
[5, 'Edith', -500]]
In [10]:
x = Data(L, fields=['id', 'name', 'amount'])
x.dshape
Out[10]:
In [11]:
x
Out[11]:
In [12]:
deadbeats = x[x.amount < 0].name
deadbeats
Out[12]:
In [13]:
from pandas import DataFrame
df = DataFrame([[1, 'Alice', 100],
[2, 'Bob', -200],
[3, 'Charlie', 300],
[4, 'Denis', 400],
[5, 'Edith', -500]], columns=['id', 'name', 'amount'])
In [14]:
df
Out[14]:
In [15]:
x = Data(df)
x
Out[15]:
In [16]:
deadbeats = x[x.amount < 0].name
deadbeats
Out[16]:
Calling compute
, we see that Blaze returns a thing like what it was given.
In [17]:
type(compute(deadbeats))
Out[17]:
In [18]:
from sqlalchemy import Table, Column, MetaData, Integer, String, create_engine
tab = Table('bank', MetaData(),
Column('id', Integer),
Column('name', String),
Column('amount', Integer))
In [19]:
x = Data(tab)
x.dshape
Out[19]:
Just like computations on pandas objects produce pandas objects, computations on SQLAlchemy tables produce SQLAlchemy Select statements.
In [20]:
deadbeats = x[x.amount < 0].name
compute(deadbeats)
Out[20]:
In [21]:
print compute(deadbeats) # SQLAlchemy generates actual SQL
In [22]:
engine = create_engine('sqlite:////home/mrocklin/workspace/blaze/blaze/examples/data/iris.db')
In [23]:
x = Data(engine)
x
Out[23]:
In [24]:
x.iris
Out[24]:
In [25]:
by(x.iris.species, shortest=x.iris.sepal_length.min(),
longest=x.iris.sepal_length.max())
Out[25]:
In [26]:
x = Data('sqlite:////home/mrocklin/workspace/blaze/blaze/examples/data/iris.db::iris')
x
Out[26]:
In [27]:
x = Data('impala://ec2-54-90-201-28.compute-1.amazonaws.com')
Github's database is mirrored in a Mongo collection hosted in the Netherlands.
Connecting via ssh tunnel. See http://ghtorrent.org/ to obtain access.
In [28]:
users = Data('mongodb://ghtorrentro:ghtorrentro@localhost/github::users')
users
Out[28]:
In [29]:
import h5py
f = h5py.File('/home/mrocklin/Downloads/OMI-Aura_L2-OMAERO_2014m1105t2304-o54838_v003-2014m1106t215558.he5')
In [30]:
x = Data(f)
x.dshape
Out[30]:
In [31]:
x.HDFEOS.SWATHS.ColumnAmountAerosol.Data_Fields.CloudPressure
Out[31]:
In [32]:
x.HDFEOS.SWATHS.ColumnAmountAerosol.Data_Fields.CloudPressure.max()
Out[32]: