In [1]:
import pandas as pd
# Create dataframe
data = {'Company': ['GOOG', 'GOOG', 'MSFT', 'MSFT', 'FB', 'FB'],
'Person': ['Sam', 'Charlie', 'Amy', 'Vanessa', 'Carl', 'Sarah'],
'Sales': [200, 120, 340, 124, 243, 350]}
In [2]:
df = pd.DataFrame(data)
In [3]:
df
Out[3]:
Now you can use the .groupby() method to group rows together based off of a column name. For instance let's group based off of Company. This will create a DataFrameGroupBy object:
In [4]:
df.groupby('Company')
Out[4]:
You can save this object as a new variable:
In [5]:
by_comp = df.groupby("Company")
And then call aggregate methods off the object:
In [6]:
by_comp.mean()
Out[6]:
In [7]:
df.groupby('Company').mean()
Out[7]:
More examples of aggregate methods:
In [8]:
by_comp.std()
Out[8]:
In [9]:
by_comp.min()
Out[9]:
In [10]:
by_comp.max()
Out[10]:
In [11]:
by_comp.count()
Out[11]:
In [12]:
by_comp.describe()
Out[12]:
In [13]:
by_comp.describe().transpose()
Out[13]:
In [14]:
by_comp.describe().transpose()['GOOG']
Out[14]: