In [1]:
import blaze as bz
import pandas as pd
In [2]:
bz.__version__
Out[2]:
In [3]:
df = pd.read_csv('/home/mrocklin/workspace/blaze/examples/data/iris.csv')
df.head(10)
Out[3]:
In [4]:
t = bz.Table('/home/mrocklin/workspace/blaze/examples/data/iris.csv')
t.head(10)
Out[4]:
In [5]:
t.species.distinct()
Out[5]:
In [6]:
df.groupby('species').sepal_length.min()
Out[6]:
In [7]:
bz.by(t.species,
smallest=t.sepal_length.min())
Out[7]:
In [8]:
# Open up SQLAlchemy engine
import sqlalchemy
engine = sqlalchemy.create_engine('sqlite:////home/mrocklin/workspace/blaze/examples/data/iris.db')
# Pull data from SQLite to Pandas
df = pd.read_sql('SELECT * FROM iris', engine)
df.head(10)
Out[8]:
In [9]:
# Point Blaze to SQLite table
t = bz.Table('sqlite:////home/mrocklin/workspace/blaze/examples/data/iris.db::iris')
t.head(10)
Out[9]:
In [10]:
df.groupby('species').sepal_length.min()
Out[10]:
In [11]:
bz.by(t.species,
smallest=t.sepal_length.min())
Out[11]:
Pandas pulled data from the SQLite database into local memory, then used pandas algorithms on that data.
Blaze generated SQL and passed that back to the database
In [12]:
expr = bz.by(t.species,
smallest=t.sepal_length.min())
result = bz.compute(expr, {t: t.data.table})
result
Out[12]:
SQLAlchemy in turn speaks SQLite
In [13]:
print result
A more complex example in SQLAlchemy
In [14]:
print bz.compute(t[t.sepal_length > 5].species.count_values(),
{t: t.data.table})
What users actually experience
In [15]:
t[t.sepal_length > 5].species.count_values()
Out[15]: