In [10]:
#第5章 pandas入门
from pandas import Series, DataFrame
import pandas as pd
In [11]:
##pandas的数据结构介绍
In [12]:
###Series
In [13]:
obj = Series([4,7,-5,3])
In [14]:
obj
Out[14]:
In [15]:
obj.values
Out[15]:
In [16]:
obj.index
Out[16]:
In [17]:
obj2 = Series([4,7,-5,3], index=['d','b','a','c'])
In [18]:
obj2
Out[18]:
In [19]:
obj2.index
Out[19]:
In [20]:
obj2['d'] = 6
In [21]:
obj2
Out[21]:
In [22]:
obj2[['c','a','d']]
Out[22]:
In [23]:
obj2[obj2>0]
Out[23]:
In [24]:
obj2*2
Out[24]:
In [25]:
np.exp(obj2)
Out[25]:
In [26]:
'b' in obj2
Out[26]:
In [27]:
'e' in obj2
Out[27]:
In [28]:
sdata = {'Ohio':35000, 'Texas':71000, 'Oregon':16000, 'Utah':5000}
In [29]:
obj3 = Series(sdata)
In [30]:
obj3
Out[30]:
In [31]:
obj3.index
Out[31]:
In [32]:
obj3.values
Out[32]:
In [33]:
states = ['California','Ohio','Oregon','Texas']
In [34]:
obj4 = Series(sdata, index=states)
In [35]:
obj4
Out[35]:
In [36]:
pd.isnull(obj4)
Out[36]:
In [37]:
pd.notnull(obj4)
Out[37]:
In [38]:
obj4.isnull()
Out[38]:
In [39]:
obj3 + obj4
Out[39]:
In [40]:
obj4.name = 'population'
In [41]:
obj4.index.name = 'state'
In [42]:
obj4
Out[42]:
In [43]:
###Series的索引可以通过赋值的方式就地修改
In [44]:
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
In [45]:
obj
Out[45]:
In [46]:
#DataFrame
In [47]:
data = {'state':['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year':[2000, 2001, 2002, 2001, 2002],
'pop':[1.5, 1.7, 3.6, 2.4, 2.9]}
In [48]:
frame = DataFrame(data)
In [49]:
frame
Out[49]:
In [50]:
DataFrame(data, columns=['year', 'state', 'pop'])
Out[50]:
In [51]:
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
index=['one', 'two', 'three', 'four', 'five'])
In [52]:
frame2
Out[52]:
In [53]:
frame2.columns
Out[53]:
In [54]:
frame2['state']
Out[54]:
In [55]:
frame2.year
Out[55]:
In [56]:
frame2.ix['three']
Out[56]:
In [57]:
frame2.debt = 16.5
In [58]:
frame2
Out[58]:
In [59]:
frame2['debt'] = np.arange(5.)
In [60]:
frame2
Out[60]:
In [61]:
val = Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
In [62]:
frame2.debt = val
In [63]:
frame2
Out[63]:
In [64]:
frame2['eastern'] = frame2.state == 'Ohio'
In [65]:
frame2
Out[65]:
In [66]:
frame2.columns
Out[66]:
In [67]:
del frame2['eastern']
In [68]:
frame2.columns
Out[68]:
In [69]:
pop = {'Nevada':{2001:2.4, 2002:2.9},
'Ohio':{2000:1.5, 2001:1.7, 2002:3.6}}
In [70]:
frame3 = DataFrame(pop)
In [71]:
frame3
Out[71]:
In [72]:
frame3.T
Out[72]:
In [73]:
DataFrame(pop, index=[2001, 2002, 2003])
Out[73]:
In [74]:
pdata = {'Ohio':frame3['Ohio'][:-1],
'Nevada':frame3['Nevada'][:2]}
In [75]:
DataFrame(pdata)
Out[75]:
In [76]:
frame3.index.name = 'year';frame3.columns.name = 'state'
In [77]:
frame3
Out[77]:
In [78]:
frame3.values
Out[78]:
In [79]:
frame2.values
Out[79]:
In [80]:
frame2
Out[80]:
In [81]:
#索引对象
In [82]:
obj = Series(range(3), index=['a', 'b','c'])
In [83]:
obj
Out[83]:
In [84]:
index = obj.index
In [85]:
index
Out[85]:
In [88]:
index[1:]
Out[88]:
In [89]:
index = pd.Index(np.arange(3))
In [90]:
index
Out[90]:
In [91]:
obj2 = Series([1.2, -2.5, 0], index = index)
In [92]:
obj2
Out[92]:
In [93]:
frame3
Out[93]:
In [94]:
'Ohio' in frame3.columns
Out[94]:
In [95]:
2003 in frame3.index
Out[95]:
In [96]:
##基本功能
In [97]:
###重新索引
In [98]:
obj = Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b','a','c'])
In [99]:
obj
Out[99]:
In [100]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
In [101]:
obj2
Out[101]:
In [102]:
obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0)
Out[102]:
In [103]:
obj3 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
In [104]:
obj3
Out[104]:
In [105]:
obj3.reindex(range(6), method='ffill')
Out[105]:
In [106]:
frame = DataFrame(np.arange(9).reshape((3,3)), index=['a','b', 'd'],
columns=['Ohio', 'Texas', 'California'])
In [107]:
frame
Out[107]:
In [108]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
In [109]:
frame2
Out[109]:
In [110]:
states = ['Texas', 'Utah', 'California']
In [111]:
frame.reindex(columns=states)
Out[111]:
In [112]:
frame.reindex(index=['a', 'b', 'c', 'd'],method='ffill',
columns = states)
Out[112]:
In [113]:
frame.ix[['a', 'b', 'c', 'd'], states]
Out[113]:
In [114]:
#丢弃指定轴上的项
In [115]:
obj = Series(np.arange(5), index=['a', 'b','c', 'd', 'e'])
In [116]:
obj
Out[116]:
In [117]:
new_obj = obj.drop('c')
In [118]:
new_obj
Out[118]:
In [119]:
data = DataFrame(np.arange(16).reshape(4,4),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
In [120]:
data
Out[120]:
In [121]:
data.drop(['Colorado', 'Ohio'])
Out[121]:
In [122]:
data.drop('two', axis=1)
Out[122]:
In [123]:
data.drop(['two', 'four'], axis=1)
Out[123]:
In [127]:
#索引、选取和过滤
In [130]:
data
Out[130]:
In [ ]: