In [1]:
from sklearn import datasets
import re
from dataframe import Callable
import numpy

In [3]:
class Mean(Callable):
    def __call__(self, *args):
        vals = args[0].values
        return numpy.mean(vals)

In [4]:
iris_data = datasets.load_iris()
features = [re.sub("\s|cm|\(|\)", "", x) for x in iris_data.feature_names]
data = {features[i]: iris_data.data[:,i] for i in range(len(iris_data.data[1,:]))}
data["target"] = iris_data.target

In [5]:
from dataframe import DataFrame
from dataframe import group, aggregate, subset
frame = DataFrame(**data)
frame


Out[5]:
A dataframe

petallength    petalwidth    sepallength    sepalwidth    target
-------------  ------------  -------------  ------------  --------
1.4            0.2           5.1            3.5           0
1.4            0.2           4.9            3.0           0
1.3            0.2           4.7            3.2           0
.              .             .              .             .
.              .             .              .             .
.              .             .              .             .
5.2            2.0           6.5            3.0           2
5.4            2.3           6.2            3.4           2
5.1            1.8           5.9            3.0           2

In [6]:
frame >> group("target") >> aggregate(Mean, "m", "sepalwidth")


Out[6]:
A dataframe

    m    target
-----  --------
3.418         0
2.77          1
2.974         2