In [1]:
import pandas as pd
import numpy as np
import karps as ks
import karps.functions as f
from karps.display import show_phase

In [2]:
def harmonic_mean(col):
    count = f.as_double(f.count(col))
    inv_sum = 1.0/f.sum(1.0/col)
    return inv_sum * count

In [3]:
df0 = pd.DataFrame([1.0, 2.0])
df0


Out[3]:
0
0 1.0
1 2.0

In [4]:
harmonic_mean(df0)


Out[4]:
1.3333333333333333

In [5]:
# Create a HUGE dataframe
df = ks.dataframe([1.0, 2.0], name="my_input")
df


Out[5]:
/my_input@org.spark.DistributedLiteral:double

In [6]:
# And apply our function:
cached_df = f.autocache(df)
hmean = harmonic_mean(cached_df)
hmean


Out[6]:
/multiply6!org.spark.LocalStructuredTransform:double

In [7]:
s = ks.session("demo1e")

In [10]:
s.eval(hmean)


Out[10]:
1.3333333333333333

In [ ]:
s = ks.session("demo1b")

In [11]:
s.compute(hmean)


Out[11]:
<karps.computation.Computation at 0x109815d30>

In [12]:
_11.values()


Out[12]:
(double, double_value: 1.3333333333333333
)

In [ ]:
df = pd.concat({'col1':c1, 'col2':c2})
df

In [ ]:
df = pd.DataFrame([1.0, 2.0])

In [ ]:
f.count(df)

In [ ]:
df2 = pd.DataFrame(list(zip(range(5), [x%2 for x in range(1,6)])))
df2

In [ ]:
z = pd.Series.groupby(df2[0], by=df2[1])
z.max()

In [ ]:
pd.Series({'x':x})

In [ ]:
type(df[df.columns[0]])

In [ ]:
pd.Series.__add__(df[0], df[0])

In [ ]:
s=(x.sum())
type(s), s

In [ ]:
np.cast(s, np.float64)

In [ ]:
x.count()

In [ ]: