In [1]:
import pandas as pd
import numpy as np

In [2]:
print(pd.__version__)


1.0.0

In [3]:
print(pd.DataFrame.agg is pd.DataFrame.aggregate)


True

In [4]:
df = pd.DataFrame({'A': [0, 1, 2], 'B': [3, 4, 5]})
print(df)


   A  B
0  0  3
1  1  4
2  2  5

In [5]:
print(df.agg(['sum', 'mean', 'min', 'max']))


        A     B
sum   3.0  12.0
mean  1.0   4.0
min   0.0   3.0
max   2.0   5.0

In [6]:
print(type(df.agg(['sum', 'mean', 'min', 'max'])))


<class 'pandas.core.frame.DataFrame'>

In [7]:
print(df.agg(['sum']))


     A   B
sum  3  12

In [8]:
print(type(df.agg(['sum'])))


<class 'pandas.core.frame.DataFrame'>

In [9]:
print(df.agg('sum'))


A     3
B    12
dtype: int64

In [10]:
print(type(df.agg('sum')))


<class 'pandas.core.series.Series'>

In [11]:
print(df.agg({'A': ['sum', 'min', 'max'],
              'B': ['mean', 'min', 'max']}))


        A    B
max   2.0  5.0
mean  NaN  4.0
min   0.0  3.0
sum   3.0  NaN

In [12]:
print(df.agg({'A': 'sum', 'B': 'mean'}))


A    3.0
B    4.0
dtype: float64

In [13]:
print(df.agg({'A': ['sum'], 'B': ['mean']}))


        A    B
mean  NaN  4.0
sum   3.0  NaN

In [14]:
print(df.agg({'A': ['min', 'max'], 'B': 'mean'}))


        A    B
max   2.0  NaN
mean  NaN  4.0
min   0.0  NaN

In [15]:
print(df.agg(['sum', 'mean', 'min', 'max'], axis=1))


   sum  mean  min  max
0  3.0   1.5  0.0  3.0
1  5.0   2.5  1.0  4.0
2  7.0   3.5  2.0  5.0

In [16]:
s = df['A']
print(s)


0    0
1    1
2    2
Name: A, dtype: int64

In [17]:
print(s.agg(['sum', 'mean', 'min', 'max']))


sum     3.0
mean    1.0
min     0.0
max     2.0
Name: A, dtype: float64

In [18]:
print(type(s.agg(['sum', 'mean', 'min', 'max'])))


<class 'pandas.core.series.Series'>

In [19]:
print(s.agg(['sum']))


sum    3
Name: A, dtype: int64

In [20]:
print(type(s.agg(['sum'])))


<class 'pandas.core.series.Series'>

In [21]:
print(s.agg('sum'))


3

In [22]:
print(type(s.agg('sum')))


<class 'numpy.int64'>

In [23]:
print(s.agg({'Total': 'sum', 'Average': 'mean', 'Min': 'min', 'Max': 'max'}))


Total      3.0
Average    1.0
Min        0.0
Max        2.0
Name: A, dtype: float64

In [24]:
# print(s.agg({'NewLabel_1': ['sum', 'max'], 'NewLabel_2': ['mean', 'min']}))
# SpecificationError: nested renamer is not supported

In [25]:
print(df.agg(['mad', 'amax', 'dtype']))


              A         B
mad    0.666667  0.666667
amax          2         5
dtype     int64     int64

In [26]:
print(df['A'].mad())


0.6666666666666666

In [27]:
print(np.amax(df['A']))


2

In [28]:
print(df['A'].dtype)


int64

In [29]:
# print(df.agg(['xxx']))
# AttributeError: 'xxx' is not a valid function for 'Series' object

In [30]:
# print(df.agg('xxx'))
# AttributeError: 'xxx' is not a valid function for 'DataFrame' object

In [31]:
print(hasattr(pd.DataFrame, '__array__'))


True

In [32]:
print(hasattr(pd.core.groupby.GroupBy, '__array__'))


False

In [33]:
print(df.agg([np.sum, max]))


     A   B
sum  3  12
max  2   5

In [34]:
print(np.sum(df['A']))


3

In [35]:
print(max(df['A']))


2

In [36]:
print(np.abs(df['A']))


0    0
1    1
2    2
Name: A, dtype: int64

In [37]:
print(df.agg([np.abs]))


         A        B
  absolute absolute
0        0        3
1        1        4
2        2        5

In [38]:
# print(df.agg([np.abs, max]))
# ValueError: cannot combine transform and aggregation operations

In [39]:
def my_func(x):
    return min(x) / max(x)

In [40]:
print(df.agg([my_func, lambda x: min(x) / max(x)]))


            A    B
my_func   0.0  0.6
<lambda>  0.0  0.6

In [41]:
print(df['A'].std())


1.0

In [42]:
print(df['A'].std(ddof=0))


0.816496580927726

In [43]:
print(df.agg(['std', lambda x: x.std(ddof=0)]))


                 A         B
std       1.000000  1.000000
<lambda>  0.816497  0.816497

In [44]:
print(df.agg('std', ddof=0))


A    0.816497
B    0.816497
dtype: float64

In [45]:
print(df.agg(['std'], ddof=0))


       A    B
std  1.0  1.0

In [46]:
df_str = df.assign(C=['X', 'Y', 'Z'])
print(df_str)


   A  B  C
0  0  3  X
1  1  4  Y
2  2  5  Z

In [47]:
# df_str['C'].mean()
# TypeError: Could not convert XYZ to numeric

In [48]:
print(df_str.agg(['sum', 'mean']))


        A     B    C
sum   3.0  12.0  XYZ
mean  1.0   4.0  NaN

In [49]:
print(df_str.agg(['mean', 'std']))


        A    B
mean  1.0  4.0
std   1.0  1.0

In [50]:
print(df_str.agg(['sum', 'min', 'max']))


     A   B    C
sum  3  12  XYZ
min  0   3    X
max  2   5    Z

In [51]:
print(df_str.select_dtypes(include='number').agg(['sum', 'mean']))


        A     B
sum   3.0  12.0
mean  1.0   4.0