In [31]:
import pandas as pd
# Create dataframe
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
'Sales':[200,120,340,124,243,350]}
In [32]:
df = pd.DataFrame(data)
In [33]:
df
Out[33]:
Now you can use the .groupby() method to group rows together based off of a column name. For instance let's group based off of Company. This will create a DataFrameGroupBy object:
In [34]:
df.groupby('Company')
Out[34]:
You can save this object as a new variable:
In [35]:
by_comp = df.groupby("Company")
And then call aggregate methods off the object:
In [36]:
by_comp.mean()
Out[36]:
In [37]:
df.groupby('Company').mean()
Out[37]:
More examples of aggregate methods:
In [38]:
by_comp.std()
Out[38]:
In [39]:
by_comp.min()
Out[39]:
In [40]:
by_comp.max()
Out[40]:
In [41]:
by_comp.count()
Out[41]:
In [42]:
by_comp.describe()
Out[42]:
In [43]:
by_comp.describe().transpose()
Out[43]:
In [44]:
by_comp.describe().transpose()['GOOG']
Out[44]: