In [21]:
import sqlite3

from numba import jitclass, float64, int64, optional
from slumba import sqlite_udaf, create_aggregate

In [22]:
@sqlite_udaf(optional(float64)(optional(float64)))
@jitclass([('total', float64), ('count', int64)])
class Avg:
    def __init__(self):
        self.total = 0.0
        self.count = 0
        
    def step(self, value):
        if value is not None:
            self.total += value
            self.count += 1
        
    def finalize(self):
        count = self.count
        if count > 0:
            return self.total / count
        return None
    
    def value(self):
        return self.finalize()
    
    def inverse(self, value):
        if value is not None:
            self.total -= value
            self.count -= 1

In [23]:
con = sqlite3.connect(':memory:')

In [24]:
create_aggregate(con, 'my_avg', 1, Avg)

In [25]:
rowiter = con.execute("""
WITH t AS (
  SELECT 1 AS c UNION
  SELECT 2 AS c UNION
  SELECT NULL AS c
)
SELECT my_avg(c) OVER (ORDER BY c) AS my_udaf,
       avg(c) OVER (ORDER BY c) AS builtin_udaf
FROM t
""")
pd.DataFrame(list(rowiter), columns=['my_udaf', 'builtin_udaf'])


Out[25]:
my_udaf builtin_udaf
0 NaN NaN
1 1.0 1.0
2 1.5 1.5