In [63]:
import keebo
import plotly.graph_objects as go
import pandas as pd
import threading
from IPython.display import display
import ipywidgets as widgets
import time
import sys

keebo.set_loglevel('error')
k = keebo.presto('localhost:7878', 'presto')
print(keebo.version())


0.0.9

1. The Number of Items Sold over Time


In [64]:
fig = go.FigureWidget(data=go.Bar())

def updateFigure(x, y):
#     assert len(x) == len(y)
    fig.data[0].x = x
    fig.data[0].y = y
    
def run_query1():
    itr = k.sql_stream("""
        SELECT shipyear, shipmonth, count(*) as c
        FROM (
            SELECT year(l_shipdate) shipyear, month(l_shipdate) shipmonth
            FROM hive.tpch_sf100.lineitem_premerged
        ) t
        GROUP BY shipyear, shipmonth
        ORDER BY shipyear, shipmonth
    """)
                       
    for i, res in enumerate(itr):
        if i < 10:
            updateFigure(x=res.index, y=res['c'])
            time.sleep(0.1)
        else:
            break

In [20]:
start = time.time()
res = k.sql('''\
BYPASS SELECT shipyear, shipmonth, count(*) as c
FROM (
    SELECT year(l_shipdate) shipyear, month(l_shipdate) shipmonth
    FROM hive.tpch_sf100.lineitem_premerged
) t
GROUP BY shipyear, shipmonth
ORDER BY shipyear, shipmonth
''')
print(f'Regular Presto took {time.time() - start} secs')


Regular Presto took 20.080857753753662 secs

In [65]:
fig



In [66]:
run_query1()


client stream_id: stream0e5a4f48

2. Their price distribution


In [67]:
fig2 = go.FigureWidget(data=go.Bar())

def updateFigure2(x, y):
    assert len(x) == len(y)
    fig2.data[0].x = x
    fig2.data[0].y = y
    
def run_query2():
    itr = k.sql_stream("""
        SELECT price, count(*) c
        FROM (
            SELECT round(l_extendedprice / 1000) * 1000 price
            FROM hive.tpch_sf100.lineitem_premerged
        ) t
        GROUP BY price
        ORDER BY price
    """)

    for i, res in enumerate(itr):
        if i < 10:
            updateFigure2(x=res['price'], y=res['c'])
            time.sleep(0.1)
        else:
            break

In [68]:
fig2



In [61]:
start = time.time()
res = k.sql('''\
BYPASS SELECT price, count(*) c
FROM (
    SELECT round(l_extendedprice / 1000) * 1000 price
    FROM hive.tpch_sf100.lineitem_premerged
) t
GROUP BY price
ORDER BY price
''')
print(f'Regular Presto took {time.time() - start} secs')


Regular Presto took 20.087167263031006 secs

In [69]:
run_query2()


client stream_id: stream8c248bde

In [ ]: