In [1]:
from blaze import symbol, compute, join
In [2]:
bank = symbol('bank', '''1000 * {id: int,
name: string,
balance: int,
lastseen: datetime}''')
bank # no data to see here
Out[2]:
In [3]:
deadbeats = bank[bank.balance < 0][['name', 'lastseen']]
deadbeats
Out[3]:
In [4]:
deadbeats.dshape
Out[4]:
In [5]:
L = [[1, 'Alice', 100],
[2, 'Bob', -200],
[3, 'Charlie', 300],
[4, 'Dennis', 400],
[5, 'Edith', -500]]
from pandas import DataFrame
df = DataFrame([[1, 'Alice', 100],
[2, 'Bob', -200],
[3, 'Charlie', 300],
[4, 'Denis', 400],
[5, 'Edith', -500]], columns=['id', 'name', 'balance'])
import pyspark
sc = pyspark.SparkContext('local', 'blaze-app')
rdd = sc.parallelize(L)
bank = symbol('bank', '''1000 * {id: int,
name: string,
balance: int}''')
deadbeats = bank[bank.balance < 0].name
In [6]:
compute(deadbeats, L)
Out[6]:
In [7]:
compute(deadbeats, df)
Out[7]:
In [8]:
compute(deadbeats, rdd)
Out[8]:
In [9]:
from blaze.compute.core import compute_up
In [10]:
compute_up.source(bank.head(), df)
In [11]:
compute_up.source(bank.head(), L)
In [12]:
compute_up.source(bank.head(), rdd)
In [13]:
x = symbol('x', '1000 * 1000 * {measurement: float32, timestamp: datetime}')
x
Out[13]:
In [14]:
expr = x[:500].measurement.sum(axis=1)
expr
Out[14]:
In [15]:
expr.dshape
Out[15]: