notebook.community

Edit and run



In [1]:

    
import numpy as np
import pandas as pd
from pandas import Series,DataFrame



In [2]:

    
df = DataFrame({'k1':['X','X','Y','Y','Z'],
                'k2':['alpha','beta','alpha','beta','alpha'],
                'dataset1':np.random.randn(5),
                'dataset2':np.random.randn(5)})
df



In [3]:

    
group1 = df['dataset1'].groupby(df['k1'])

group1









    Out[3]:





<pandas.core.groupby.SeriesGroupBy object at 0x7fc69cfb6ed0>



In [4]:

    
group1.mean()









    Out[4]:





k1
X    0.035253
Y    0.032087
Z   -0.281585
Name: dataset1, dtype: float64



In [5]:

    
# with series
cities = np.array(['NY','LA','LA','NY','NY'])



In [6]:

    
month = np.array(['JAN','FEB','JAN','FEB','JAN'])



In [7]:

    
df['dataset1'].groupby([cities,month]).mean()









    Out[7]:





LA  FEB   -0.862080
    JAN   -1.431315
NY  FEB    1.495488
    JAN    0.325500
Name: dataset1, dtype: float64



In [9]:

    
# pass columns as keys
df.groupby('k1').mean()



In [10]:

    
df



In [11]:

    
# groupby by multiple columns
df.groupby(['k1','k2']).mean()



In [12]:

    
df.groupby(['k1']).size()









    Out[12]:





k1
X    2
Y    2
Z    1
dtype: int64



In [13]:

    
df.groupby('k2').size()









    Out[13]:





k2
alpha    3
beta     2
dtype: int64



In [21]:

    
for name,group in df.groupby('k1'):
    print('This is the %s group' %name)
    print(group)
    print('\n')









    



This is the X group
   dataset1  dataset2 k1     k2
0  0.932585  0.538335  X  alpha
1 -0.862080  1.898216  X   beta


This is the Y group
   dataset1  dataset2 k1     k2
2 -1.431315 -0.544031  Y  alpha
3  1.495488 -2.993656  Y   beta


This is the Z group
   dataset1  dataset2 k1     k2
4 -0.281585  0.169025  Z  alpha



In [24]:

    
for (k1,k2),group in df.groupby(['k1','k2']):
    print('Key1 = %s , key2 = %s' %(k1,k2))
    print(group)
    print('\n')









    



Key1 = X , key2 = alpha
   dataset1  dataset2 k1     k2
0  0.932585  0.538335  X  alpha


Key1 = X , key2 = beta
   dataset1  dataset2 k1    k2
1  -0.86208  1.898216  X  beta


Key1 = Y , key2 = alpha
   dataset1  dataset2 k1     k2
2 -1.431315 -0.544031  Y  alpha


Key1 = Y , key2 = beta
   dataset1  dataset2 k1    k2
3  1.495488 -2.993656  Y  beta


Key1 = Z , key2 = alpha
   dataset1  dataset2 k1     k2
4 -0.281585  0.169025  Z  alpha



In [28]:

    
group_dict = dict(list(df.groupby('k1')))

group_dict









    Out[28]:





{'X':    dataset1  dataset2 k1     k2
 0  0.932585  0.538335  X  alpha
 1 -0.862080  1.898216  X   beta, 'Y':    dataset1  dataset2 k1     k2
 2 -1.431315 -0.544031  Y  alpha
 3  1.495488 -2.993656  Y   beta, 'Z':    dataset1  dataset2 k1     k2
 4 -0.281585  0.169025  Z  alpha}



In [27]:

    
group_dict['X']



In [29]:

    
group_dict['Y']



In [35]:

    
# separate data by dtypes
group_dict_axis1 = dict(list(df.groupby(df.dtypes,axis=1)))



In [32]:

    
group_dict_axis1









    Out[32]:





{dtype('float64'):    dataset1  dataset2
 0  0.932585  0.538335
 1 -0.862080  1.898216
 2 -1.431315 -0.544031
 3  1.495488 -2.993656
 4 -0.281585  0.169025, dtype('O'):   k1     k2
 0  X  alpha
 1  X   beta
 2  Y  alpha
 3  Y   beta
 4  Z  alpha}



In [36]:

    
# using columns
dataset2_group = df.groupby(['k1','k2'])[['dataset2']]

dataset2_group.mean()



In [ ]:

	dataset1	dataset2	k1	k2
0	0.932585	0.538335	X	alpha
1	-0.862080	1.898216	X	beta
2	-1.431315	-0.544031	Y	alpha
3	1.495488	-2.993656	Y	beta
4	-0.281585	0.169025	Z	alpha