In [1]:
import karps as ks
import karps.functions as f
from karps.display import show_phase

In [2]:
df = ks.dataframe([1.0, 2.0], name="my_input")
df


Out[2]:
/my_input@org.spark.DistributedLiteral:double

In [3]:


In [4]:
def harmonic_mean(col):
    count = f.as_double(f.count(col))
    inv_sum = f.inv(f.sum(f.inv(col)))
    return inv_sum * count

In [5]:
cached_df = f.autocache(df)
hmean = harmonic_mean(cached_df)
hmean


Out[5]:
/multiply_11!org.spark.LocalStructuredTransform:double

In [ ]:


In [ ]:


In [ ]:


In [6]:
s = ks.session("demo1c")
comp = s.compute(hmean)

In [7]:
show_phase(comp, "initial")



In [8]:
show_phase(comp, "REMOVE_OBSERVABLE_BROADCASTS")



In [9]:
show_phase(comp, "MERGE_PREAGG_AGGREGATIONS")



In [10]:
show_phase(comp, "MERGE_AGGREGATIONS")



In [11]:
show_phase(comp, "final")



In [12]:
comp.values()


Out[12]:
(double, double_value: 1.3333333333333333
)

In [13]:
s.run(the_mean)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-d40191459ce7> in <module>()
----> 1 s.run(the_mean)

NameError: name 'the_mean' is not defined

In [ ]: