In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
In [2]:
df = sns.load_dataset('iris')
print(df.shape)
In [3]:
print(df.head(5))
In [4]:
df.columns = ['sl', 'sw', 'pl', 'pw', 'species']
print(df.head(5))
In [5]:
grouped = df.groupby('species')
print(grouped)
In [6]:
print(type(grouped))
In [7]:
print(grouped.size())
In [8]:
print(grouped.mean())
In [9]:
print(grouped.min())
In [10]:
print(grouped.max())
In [11]:
print(grouped.sum())
In [12]:
print(type(grouped.mean()))
In [13]:
print(grouped.agg('mean'))
In [14]:
print(grouped.agg(max))
In [15]:
print(grouped.agg(np.min))
In [16]:
print(grouped.agg(lambda x: max(x) - min(x)))
In [17]:
print(grouped.agg(lambda x: type(x))['sl'])
In [18]:
# print(grouped.agg(lambda x: x + 1))
# Exception: Must produce aggregated value
In [19]:
def my_func(x):
return max(x) - min(x)
In [20]:
print(grouped.agg(my_func))
In [21]:
print(grouped.agg(['mean', max, np.min]))
In [22]:
print(grouped.agg({'sl': 'mean', 'sw': max, 'pl': np.min, 'pw': my_func}))
In [23]:
print(grouped.describe()['sl'])
In [24]:
print(type(grouped.max()))
In [25]:
%matplotlib agg
In [26]:
ax = grouped.max().plot.bar(rot=0)
fig = ax.get_figure()
fig.savefig('data/dst/iris_pandas_groupby_max.jpg')