notebook.community

Edit and run



In [1]:

    
# Combining with structured transforms
import karps as ks
import karps.functions as f
from karps.display import show_phase



In [2]:

    
employees = ks.dataframe([("a", 1), ("a", 2), ("b", 1)], schema=["my_key", "my_val"],
name="employees")
employees









    Out[2]:





/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}



In [3]:

    
c = employees.my_val >= 2
c









    Out[3]:





greater_equal(my_val,LITERAL(int)):bool<-/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}



In [4]:

    
df1 = employees.my_key[employees.my_val >= 2]
o = f.collect(df1)
o









    



_col_op_proto: extraction=['my_val'] c=my_val:int<-/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}
_col_op_proto: extraction=['my_key'] c=value:string<-/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}
_as_nodes: x=<class 'karps.column.DataFrame'>:/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}
_as_nodes: x=<class 'karps.column.DataFrame'>:/structured_transform_0@org.spark.StructuredTransform:{filter:bool, value:string}
_as_nodes: x=<class 'karps.column.DataFrame'>:/filter_1@org.spark.Filter:string






    Out[4]:





/collect_list_2!org.spark.StructuredReduce:[string]



In [5]:

    
s = ks.session("demo2c")
comp = s.compute(o)



In [6]:

    
show_phase(comp, "initial")



In [7]:

    
comp.values()









    Out[7]:





([string], array_value {
  values {
    string_value: "a"
  }
}
)



In [ ]:



In [ ]:



In [ ]:



In [ ]: