In [161]:
%matplotlib inline
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
1.1 Series多层索引
In [162]:
data = Series(np.random.randn(10),index=[['a','a','a','b','b','b','c','c','d','d'],[0,1,2,0,1,2,0,1,0,1]])
data
Out[162]:
index参数中有两个list,其中第一个为外层索引,第二个为内层索引
In [163]:
data.index
Out[163]:
In [164]:
data['b']
Out[164]:
In [165]:
data['a':'c']
Out[165]:
In [166]:
data[['a','c']]
Out[166]:
In [167]:
data[:,1]
Out[167]:
In [168]:
data['a',0]
Out[168]:
2.DataFrame多层索引
In [169]:
df = DataFrame(np.random.randn(6,3),
index = [['a','a','b','b','c','c'],[1,2,1,2,1,2]],
columns = [['beijing','beijing','shanghai'],['boy','girl','boy']])
df
Out[169]:
In [170]:
df['beijing']
Out[170]:
In [171]:
df[['beijing','shanghai']]
Out[171]:
In [172]:
df['beijing']['boy']
Out[172]:
In [173]:
df.loc['a',:]
Out[173]:
In [174]:
df.loc[['a','b'],'beijing']
Out[174]:
In [175]:
df['beijing'].loc[['a','b']]
Out[175]:
In [176]:
data.index = [
['a','a','a','b','b','b','c','c','d','d'],
['0','1','2','0','1','2','0','1','0','1'],
['aa','aa','aa','aa','aa','bb','bb','bb','bb','bb']
]
data
Out[176]:
In [177]:
data.swaplevel()
Out[177]:
swaplevel()函数将索引进行层级交换
In [178]:
data.swaplevel(0)
Out[178]:
In [179]:
data.index.names = ['one','two','three']
data
Out[179]:
In [180]:
data.swaplevel('one','three')
Out[180]:
swaplevel()函数中指定索引的name进行索引层次交换
In [181]:
frame = data.unstack()
frame
Out[181]:
In [182]:
frame.stack()
Out[182]:
In [183]:
frame = data.unstack('two')
frame
Out[183]:
In [184]:
frame = frame.unstack()
frame
Out[184]:
In [185]:
frame['0']
Out[185]:
In [186]:
frame['0']['aa'].loc['a']
Out[186]:
In [187]:
data
Out[187]:
In [188]:
data.sum(level = 'one')
Out[188]:
In [189]:
data.std(level='two')
Out[189]:
In [190]:
df
Out[190]:
In [191]:
df.index.names = ['one', 'two']
df.columns.names = ['la','lb']
df
Out[191]:
In [192]:
df.sum(level = 'one')
Out[192]:
In [193]:
df.std(level = 'two')
Out[193]:
In [194]:
df.sum(level = 'la',axis=1)
Out[194]:
In [195]:
data
Out[195]:
In [196]:
grouped = data.groupby(level='one')
grouped
Out[196]:
In [197]:
for name, group in grouped:
print(name)
print('---------------------------')
print(group)
In [198]:
grouped.size()
Out[198]:
In [199]:
grouped.sum()
Out[199]:
In [200]:
data.groupby(level='one').sum()
Out[200]:
In [201]:
key = [1,1,1,1,1,2,2,2,2,2]
for name, group in data.groupby(key):
print(name)
print('-------------------')
print(group)
In [202]:
df
Out[202]:
In [226]:
df_grouped = df.groupby(level = 'one')
df_grouped.size()
Out[226]:
In [204]:
for name, group in df_grouped:
print(name)
print('---------------------')
print(group)
In [205]:
df_grouped.sum()
Out[205]:
In [206]:
df.groupby(level = 'la', axis = 1).sum()
Out[206]:
In [208]:
df['key1'] = ['male', 'famale', 'male', 'famale','male', 'male']
df
Out[208]:
In [209]:
df.groupby('key1').mean()
Out[209]:
In [211]:
key2 = ['bat','abc','bat','abc','abc','abc']
df.groupby(key2).sum()
Out[211]:
In [212]:
df
Out[212]:
In [222]:
df['key2'] = ['bat','abc','bat','abc', 'abc', 'abc']
df
Out[222]:
In [223]:
df.groupby(['key1', 'key2']).mean()
Out[223]:
In [234]:
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
'B' : ['A', 'B', 'C'] * 4,
'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
'D' : np.random.randn(12),
'E' : np.random.randn(12)})
df
Out[234]:
In [239]:
pivot_df = df.pivot_table(values='D', index=['A', 'B'], columns=['C'])
pivot_df
Out[239]:
In [242]:
pivot_df['bar']
Out[242]: