In [1]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
In [2]:
df = DataFrame({'k1':['X','X','Y','Y','Z'],
'k2':['alpha','beta','alpha','beta','alpha'],
'dataset1':np.random.randn(5),
'dataset2':np.random.randn(5)})
df
Out[2]:
In [3]:
group1 = df['dataset1'].groupby(df['k1'])
group1
Out[3]:
In [4]:
group1.mean()
Out[4]:
In [5]:
# with series
cities = np.array(['NY','LA','LA','NY','NY'])
In [6]:
month = np.array(['JAN','FEB','JAN','FEB','JAN'])
In [7]:
df['dataset1'].groupby([cities,month]).mean()
Out[7]:
In [9]:
# pass columns as keys
df.groupby('k1').mean()
Out[9]:
In [10]:
df
Out[10]:
In [11]:
# groupby by multiple columns
df.groupby(['k1','k2']).mean()
Out[11]:
In [12]:
df.groupby(['k1']).size()
Out[12]:
In [13]:
df.groupby('k2').size()
Out[13]:
In [21]:
for name,group in df.groupby('k1'):
print('This is the %s group' %name)
print(group)
print('\n')
In [24]:
for (k1,k2),group in df.groupby(['k1','k2']):
print('Key1 = %s , key2 = %s' %(k1,k2))
print(group)
print('\n')
In [28]:
group_dict = dict(list(df.groupby('k1')))
group_dict
Out[28]:
In [27]:
group_dict['X']
Out[27]:
In [29]:
group_dict['Y']
Out[29]:
In [35]:
# separate data by dtypes
group_dict_axis1 = dict(list(df.groupby(df.dtypes,axis=1)))
In [32]:
group_dict_axis1
Out[32]:
In [36]:
# using columns
dataset2_group = df.groupby(['k1','k2'])[['dataset2']]
dataset2_group.mean()
Out[36]:
In [ ]: