notebook.community

Edit and run



In [21]:

    
import sqlite3

from numba import jitclass, float64, int64, optional
from slumba import sqlite_udaf, create_aggregate



In [22]:

    
@sqlite_udaf(optional(float64)(optional(float64)))
@jitclass([('total', float64), ('count', int64)])
class Avg:
    def __init__(self):
        self.total = 0.0
        self.count = 0
        
    def step(self, value):
        if value is not None:
            self.total += value
            self.count += 1
        
    def finalize(self):
        count = self.count
        if count > 0:
            return self.total / count
        return None
    
    def value(self):
        return self.finalize()
    
    def inverse(self, value):
        if value is not None:
            self.total -= value
            self.count -= 1



In [23]:

    
con = sqlite3.connect(':memory:')



In [24]:

    
create_aggregate(con, 'my_avg', 1, Avg)



In [25]:

    
rowiter = con.execute("""
WITH t AS (
  SELECT 1 AS c UNION
  SELECT 2 AS c UNION
  SELECT NULL AS c
)
SELECT my_avg(c) OVER (ORDER BY c) AS my_udaf,
       avg(c) OVER (ORDER BY c) AS builtin_udaf
FROM t
""")
pd.DataFrame(list(rowiter), columns=['my_udaf', 'builtin_udaf'])









    Out[25]:







  
    
      
      my_udaf
      builtin_udaf
    
  
  
    
      0
      NaN
      NaN
    
    
      1
      1.0
      1.0
    
    
      2
      1.5
      1.5