6.30 - Pandas groupby


In [1]:
import numpy as np
import pandas as pd

In [2]:
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}

In [4]:
df = pd.DataFrame(data)

In [5]:
df


Out[5]:
Company Person Sales
0 GOOG Sam 200
1 GOOG Charlie 120
2 MSFT Amy 340
3 MSFT Vanessa 124
4 FB Carl 243
5 FB Sarah 350

In [6]:
df.groupby('Company')


Out[6]:
<pandas.core.groupby.DataFrameGroupBy object at 0x7f2f18abfc50>

In [7]:
by_comp = df.groupby('Company')

In [9]:
by_comp.mean()


Out[9]:
Sales
Company
FB 296.5
GOOG 160.0
MSFT 232.0

In [10]:
by_comp.std()


Out[10]:
Sales
Company
FB 75.660426
GOOG 56.568542
MSFT 152.735065

In [13]:
by_comp.sum().loc['FB']


Out[13]:
Sales    593
Name: FB, dtype: int64

In [15]:
df.groupby('Company').sum().loc['FB'] # the one-liner


Out[15]:
Sales    593
Name: FB, dtype: int64

In [16]:
df.groupby('Company').count()


Out[16]:
Person Sales
Company
FB 2 2
GOOG 2 2
MSFT 2 2

In [18]:
df.groupby('Company').max() # Note that this does not work for strings. Data is mixed up now


Out[18]:
Person Sales
Company
FB Sarah 350
GOOG Sam 200
MSFT Vanessa 340

In [19]:
df.groupby('Company').describe()


Out[19]:
Sales
Company
FB count 2.000000
mean 296.500000
std 75.660426
min 243.000000
25% 269.750000
50% 296.500000
75% 323.250000
max 350.000000
GOOG count 2.000000
mean 160.000000
std 56.568542
min 120.000000
25% 140.000000
50% 160.000000
75% 180.000000
max 200.000000
MSFT count 2.000000
mean 232.000000
std 152.735065
min 124.000000
25% 178.000000
50% 232.000000
75% 286.000000
max 340.000000

In [20]:
df.groupby('Company').describe().transpose()['FB']


Out[20]:
count mean std min 25% 50% 75% max
Sales 2.0 296.5 75.660426 243.0 269.75 296.5 323.25 350.0

In [ ]: