perform group by on dataframe and iterate on the grouped result

In [1]:
import pandas as pd

In [2]:
classes = ["class 1"] * 5 + ["class 2"] * 5
sub_class = ['c1','c2','c2','c1','c3'] + ['c1','c2','c3','c2','c3']
vals = [1,3,5,1,3] + [2,6,7,5,2]
p_df = pd.DataFrame({"class": classes, "sub_class": sub_class, "vals": vals})

In [3]:
p_df


Out[3]:
class sub_class vals
0 class 1 c1 1
1 class 1 c2 3
2 class 1 c2 5
3 class 1 c1 1
4 class 1 c3 3
5 class 2 c1 2
6 class 2 c2 6
7 class 2 c3 7
8 class 2 c2 5
9 class 2 c3 2

In [4]:
grouped = p_df.groupby(['class', 'sub_class'])['vals'].median()

In [5]:
grouped


Out[5]:
class    sub_class
class 1  c1           1.0
         c2           4.0
         c3           3.0
class 2  c1           2.0
         c2           5.5
         c3           4.5
Name: vals, dtype: float64

In [6]:
for index_val, value in grouped.iteritems():
    class_name, sub_class_name = index_val
    print(class_name, ":", sub_class_name, ":", value)


class 1 : c1 : 1.0
class 1 : c2 : 4.0
class 1 : c3 : 3.0
class 2 : c1 : 2.0
class 2 : c2 : 5.5
class 2 : c3 : 4.5

In [ ]: