In [21]:
import sqlite3
from numba import jitclass, float64, int64, optional
from slumba import sqlite_udaf, create_aggregate
In [22]:
@sqlite_udaf(optional(float64)(optional(float64)))
@jitclass([('total', float64), ('count', int64)])
class Avg:
def __init__(self):
self.total = 0.0
self.count = 0
def step(self, value):
if value is not None:
self.total += value
self.count += 1
def finalize(self):
count = self.count
if count > 0:
return self.total / count
return None
def value(self):
return self.finalize()
def inverse(self, value):
if value is not None:
self.total -= value
self.count -= 1
In [23]:
con = sqlite3.connect(':memory:')
In [24]:
create_aggregate(con, 'my_avg', 1, Avg)
In [25]:
rowiter = con.execute("""
WITH t AS (
SELECT 1 AS c UNION
SELECT 2 AS c UNION
SELECT NULL AS c
)
SELECT my_avg(c) OVER (ORDER BY c) AS my_udaf,
avg(c) OVER (ORDER BY c) AS builtin_udaf
FROM t
""")
pd.DataFrame(list(rowiter), columns=['my_udaf', 'builtin_udaf'])
Out[25]: