notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
import karps as ks
import karps.functions as f
from karps.display import show_phase



In [2]:

    
def harmonic_mean(col):
    count = f.as_double(f.count(col))
    inv_sum = 1.0/f.sum(1.0/col)
    return inv_sum * count



In [3]:

    
df0 = pd.DataFrame([1.0, 2.0])
df0



In [4]:

    
harmonic_mean(df0)









    Out[4]:





1.3333333333333333



In [5]:

    
# Create a HUGE dataframe
df = ks.dataframe([1.0, 2.0], name="my_input")
df









    Out[5]:





/my_input@org.spark.DistributedLiteral:double



In [6]:

    
# And apply our function:
cached_df = f.autocache(df)
hmean = harmonic_mean(cached_df)
hmean









    Out[6]:





/multiply6!org.spark.LocalStructuredTransform:double



In [7]:

    
s = ks.session("demo1e")



In [10]:

    
s.eval(hmean)









    Out[10]:





1.3333333333333333



In [ ]:

    
s = ks.session("demo1b")



In [11]:

    
s.compute(hmean)









    Out[11]:





<karps.computation.Computation at 0x109815d30>



In [12]:

    
_11.values()









    Out[12]:





(double, double_value: 1.3333333333333333
)



In [ ]:

    
df = pd.concat({'col1':c1, 'col2':c2})
df



In [ ]:

    
df = pd.DataFrame([1.0, 2.0])



In [ ]:

    
f.count(df)



In [ ]:

    
df2 = pd.DataFrame(list(zip(range(5), [x%2 for x in range(1,6)])))
df2



In [ ]:

    
z = pd.Series.groupby(df2[0], by=df2[1])
z.max()



In [ ]:

    
pd.Series({'x':x})



In [ ]:

    
type(df[df.columns[0]])



In [ ]:

    
pd.Series.__add__(df[0], df[0])



In [ ]:

    
s=(x.sum())
type(s), s



In [ ]:

    
np.cast(s, np.float64)



In [ ]:

    
x.count()



In [ ]: