In [1]:
# Combining with structured transforms
import karps as ks
import karps.functions as f
from karps.display import show_phase

In [2]:
employees = ks.dataframe([("a", 1), ("a", 2), ("b", 1)], schema=["my_key", "my_val"],
name="employees")
employees


Out[2]:
/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}

In [3]:
c = employees.my_val >= 2
c


Out[3]:
greater_equal(my_val,LITERAL(int)):bool<-/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}

In [4]:
df1 = employees.my_key[employees.my_val >= 2]
o = f.collect(df1)
o


_col_op_proto: extraction=['my_val'] c=my_val:int<-/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}
_col_op_proto: extraction=['my_key'] c=value:string<-/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}
_as_nodes: x=<class 'karps.column.DataFrame'>:/employees@org.spark.DistributedLiteral:{my_key:string, my_val:int}
_as_nodes: x=<class 'karps.column.DataFrame'>:/structured_transform_0@org.spark.StructuredTransform:{filter:bool, value:string}
_as_nodes: x=<class 'karps.column.DataFrame'>:/filter_1@org.spark.Filter:string
Out[4]:
/collect_list_2!org.spark.StructuredReduce:[string]

In [5]:
s = ks.session("demo2c")
comp = s.compute(o)

In [6]:
show_phase(comp, "initial")



In [7]:
comp.values()


Out[7]:
([string], array_value {
  values {
    string_value: "a"
  }
}
)

In [ ]:


In [ ]:


In [ ]:


In [ ]: