In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import DataFrame, Series
In [9]:
people = DataFrame(np.random.randn(5, 5),
columns=['a', 'b', 'c', 'd', 'e'],
index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
people.ix[2:3, ['b', 'c']] = np.nan # 添加几个NA值
people
Out[9]:
In [12]:
key = ['one', 'two', 'one', 'two', 'one']
people.groupby(key).mean()
Out[12]:
In [13]:
people.groupby(key).transform(np.mean)
Out[13]:
In [14]:
def demean(arr):
return arr - arr.mean()
demeaned = people.groupby(key).transform(demean)
demeaned
Out[14]:
In [15]:
demeaned.groupby(key).mean()
Out[15]:
In [ ]:
In [3]:
mapping = {'a': 'red', 'b': 'red', 'c': 'blue',
'd': 'blue', 'e': 'red', 'f': 'orange'}
by_column = people.groupby(mapping, axis=1)
by_column.sum()
Out[3]:
In [ ]:
In [4]:
map_series = Series(mapping)
map_series
Out[4]:
In [5]:
people.groupby(map_series, axis=1).count()
Out[5]:
In [6]:
people.groupby(len).sum()
Out[6]:
In [7]:
key_list = ['one', 'one', 'one', 'two', 'two']
people.groupby([len, key_list]).min()
Out[7]:
In [ ]: