In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import DataFrame, Series
In [2]:
frame = DataFrame({'data1': np.random.randn(1000),
'data2': np.random.randn(1000)})
factor = pd.cut(frame.data1, 4)
factor[:10]
Out[2]:
In [5]:
def get_stats(group):
return {'min': group.min(), 'max': group.max(),
'count': group.count(), 'mean': group.mean()}
grouped = frame.data2.groupby(factor)
grouped.apply(get_stats).unstack()
Out[5]:
In [6]:
# 返回分位数编号
grouping = pd.qcut(frame.data1, 10, labels=False)
grouped = frame.data2.groupby(grouping)
grouped.apply(get_stats).unstack()
Out[6]:
In [8]:
s=Series(np.random.randn(6))
s[::2] = np.nan
s
Out[8]:
In [9]:
s.fillna(s.mean())
Out[9]:
In [10]:
states = ['Ohio', 'New York', 'Vermont', 'Florida',
'Oregon', 'Nevada', 'California', 'Idaho']
group_key = ['East']*4 + ['West'] * 4
data = Series(np.random.randn(8), index=states)
data[['Vermont', 'Nevada', 'Idaho']] = np.nan
data
Out[10]:
In [11]:
data.groupby(group_key).mean()
Out[11]:
In [12]:
fill_mean = lambda g: g.fillna(g.mean())
data.groupby(group_key).apply(fill_mean)
Out[12]:
In [13]:
fill_values = {'East': 0.5, 'West': -1}
fill_func = lambda g: g.fillna(fill_values[g.name])
data.groupby(group_key).apply(fill_func)
Out[13]:
In [14]:
# 红桃(Hearts) 、黑桃(Spades)、梅花(Clubs)、方片(Diamonds)
suits = ['H', 'S', 'C', 'D']
card_val = (range(1, 11) + [10]*3)*4
base_names = ['A'] + range(2, 11) + ['J', 'K', 'Q']
cards = []
for suit in ['H', 'S', 'C', 'D']:
cards.extend(str(num) + suit for num in base_names)
deck = Series(card_val, index=cards)
deck[:13]
Out[14]:
In [15]:
def draw(deck, n=5):
return deck.take(np.random.permutation(len(deck))[:n])
draw(deck)
Out[15]:
In [16]:
get_suit = lambda card: card[-1] # 只要最后一个字母就可以了
deck.groupby(get_suit).apply(draw, n=2)
Out[16]:
In [18]:
#另一种办法
deck.groupby(get_suit, group_keys=False).apply(draw, n=2)
Out[18]:
In [ ]: