In [63]:
import keebo
import plotly.graph_objects as go
import pandas as pd
import threading
from IPython.display import display
import ipywidgets as widgets
import time
import sys
keebo.set_loglevel('error')
k = keebo.presto('localhost:7878', 'presto')
print(keebo.version())
In [64]:
fig = go.FigureWidget(data=go.Bar())
def updateFigure(x, y):
# assert len(x) == len(y)
fig.data[0].x = x
fig.data[0].y = y
def run_query1():
itr = k.sql_stream("""
SELECT shipyear, shipmonth, count(*) as c
FROM (
SELECT year(l_shipdate) shipyear, month(l_shipdate) shipmonth
FROM hive.tpch_sf100.lineitem_premerged
) t
GROUP BY shipyear, shipmonth
ORDER BY shipyear, shipmonth
""")
for i, res in enumerate(itr):
if i < 10:
updateFigure(x=res.index, y=res['c'])
time.sleep(0.1)
else:
break
In [20]:
start = time.time()
res = k.sql('''\
BYPASS SELECT shipyear, shipmonth, count(*) as c
FROM (
SELECT year(l_shipdate) shipyear, month(l_shipdate) shipmonth
FROM hive.tpch_sf100.lineitem_premerged
) t
GROUP BY shipyear, shipmonth
ORDER BY shipyear, shipmonth
''')
print(f'Regular Presto took {time.time() - start} secs')
In [65]:
fig
In [66]:
run_query1()
In [67]:
fig2 = go.FigureWidget(data=go.Bar())
def updateFigure2(x, y):
assert len(x) == len(y)
fig2.data[0].x = x
fig2.data[0].y = y
def run_query2():
itr = k.sql_stream("""
SELECT price, count(*) c
FROM (
SELECT round(l_extendedprice / 1000) * 1000 price
FROM hive.tpch_sf100.lineitem_premerged
) t
GROUP BY price
ORDER BY price
""")
for i, res in enumerate(itr):
if i < 10:
updateFigure2(x=res['price'], y=res['c'])
time.sleep(0.1)
else:
break
In [68]:
fig2
In [61]:
start = time.time()
res = k.sql('''\
BYPASS SELECT price, count(*) c
FROM (
SELECT round(l_extendedprice / 1000) * 1000 price
FROM hive.tpch_sf100.lineitem_premerged
) t
GROUP BY price
ORDER BY price
''')
print(f'Regular Presto took {time.time() - start} secs')
In [69]:
run_query2()
In [ ]: