In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

In [3]:
dframe = DataFrame({'k1': list('XXYYX'), 
                   'k2': ['alpha', 'beta', 'alpha', 'beta', 'alpha'],
                   'dataset1': np.random.randn(5),
                   'dataset2': np.random.randn(5)})

dframe


Out[3]:
dataset1 dataset2 k1 k2
0 -0.933727 -0.334732 X alpha
1 0.276381 0.138139 X beta
2 -0.954061 0.665582 Y alpha
3 -1.093598 -1.893270 Y beta
4 0.033247 -0.628099 X alpha

In [4]:
group1 = dframe['dataset1'].groupby(dframe['k1'])

In [5]:
group1


Out[5]:
<pandas.core.groupby.SeriesGroupBy object at 0x11471aa90>

In [6]:
group1.mean()


Out[6]:
k1
X   -0.208033
Y   -1.023830
Name: dataset1, dtype: float64

In [7]:
cities = np.array(['NY', 'LA', 'LA', 'NY', 'NY'])

month = np.array(['JAN', 'FEB', 'JAN', 'FEB', 'JAN'])

In [8]:
group1 = dframe['dataset1'].groupby([cities, month]).mean()

In [9]:
group1


Out[9]:
LA  FEB    0.276381
    JAN   -0.954061
NY  FEB   -1.093598
    JAN   -0.450240
Name: dataset1, dtype: float64

In [10]:
dframe.groupby('k1').mean()


Out[10]:
dataset1 dataset2
k1
X -0.208033 -0.274897
Y -1.023830 -0.613844

In [11]:
dframe.groupby(['k1', 'k2']).mean()


Out[11]:
dataset1 dataset2
k1 k2
X alpha -0.450240 -0.481416
beta 0.276381 0.138139
Y alpha -0.954061 0.665582
beta -1.093598 -1.893270

In [12]:
dframe.groupby(['k1', 'k2']).size()


Out[12]:
k1  k2   
X   alpha    2
    beta     1
Y   alpha    1
    beta     1
dtype: int64

In [14]:
for name, group in dframe.groupby('k1'):
    print "This is the %s group" %name
    print group
    print '\n'


This is the X group
   dataset1  dataset2 k1     k2
0 -0.933727 -0.334732  X  alpha
1  0.276381  0.138139  X   beta
4  0.033247 -0.628099  X  alpha


This is the Y group
   dataset1  dataset2 k1     k2
2 -0.954061  0.665582  Y  alpha
3 -1.093598 -1.893270  Y   beta



In [15]:
for (k1, k2), group in dframe.groupby(['k1', 'k2']):
    print 'Key1 = %s Key2 = %s' %(k1,k2)
    print group
    print '\n'


Key1 = X Key2 = alpha
   dataset1  dataset2 k1     k2
0 -0.933727 -0.334732  X  alpha
4  0.033247 -0.628099  X  alpha


Key1 = X Key2 = beta
   dataset1  dataset2 k1    k2
1  0.276381  0.138139  X  beta


Key1 = Y Key2 = alpha
   dataset1  dataset2 k1     k2
2 -0.954061  0.665582  Y  alpha


Key1 = Y Key2 = beta
   dataset1  dataset2 k1    k2
3 -1.093598  -1.89327  Y  beta



In [17]:
group_dict = dict(list(dframe.groupby('k1')))
group_dict['X']


Out[17]:
dataset1 dataset2 k1 k2
0 -0.933727 -0.334732 X alpha
1 0.276381 0.138139 X beta
4 0.033247 -0.628099 X alpha

In [18]:
group_dict_axis1 = dict(list(dframe.groupby(dframe.dtypes, axis=1)))

In [19]:
group_dict_axis1


Out[19]:
{dtype('float64'):    dataset1  dataset2
 0 -0.933727 -0.334732
 1  0.276381  0.138139
 2 -0.954061  0.665582
 3 -1.093598 -1.893270
 4  0.033247 -0.628099, dtype('O'):   k1     k2
 0  X  alpha
 1  X   beta
 2  Y  alpha
 3  Y   beta
 4  X  alpha}

In [23]:
group_dict_axis1.keys()


Out[23]:
[dtype('O'), dtype('float64')]

In [27]:
group_dict_axis1[np.dtype('float64')]


Out[27]:
dataset1 dataset2
0 -0.933727 -0.334732
1 0.276381 0.138139
2 -0.954061 0.665582
3 -1.093598 -1.893270
4 0.033247 -0.628099

In [28]:
dataset2_group = dframe.groupby(['k1', 'k2'])[['dataset2']]

dataset2_group.mean()


Out[28]:
dataset2
k1 k2
X alpha -0.481416
beta 0.138139
Y alpha 0.665582
beta -1.893270

In [ ]: