In [1]:
from sklearn import datasets
import re
from dataframe import Callable
import numpy
In [3]:
class Mean(Callable):
def __call__(self, *args):
vals = args[0].values
return numpy.mean(vals)
In [4]:
iris_data = datasets.load_iris()
features = [re.sub("\s|cm|\(|\)", "", x) for x in iris_data.feature_names]
data = {features[i]: iris_data.data[:,i] for i in range(len(iris_data.data[1,:]))}
data["target"] = iris_data.target
In [5]:
from dataframe import DataFrame
from dataframe import group, aggregate, subset
frame = DataFrame(**data)
frame
Out[5]:
In [6]:
frame >> group("target") >> aggregate(Mean, "m", "sepalwidth")
Out[6]: