In [148]:
import pandas as pd
import numpy as np
In [4]:
obj2 = pd.Series(arange(5), index=['a','b','c','d','e'])
In [5]:
obj2.values
Out[5]:
In [6]:
obj2.index
Out[6]:
In [7]:
obj2
Out[7]:
In [8]:
obj2[['a','c']]
Out[8]:
In [10]:
obj2[obj2<2]
Out[10]:
In [11]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
In [20]:
obj3=pd.Series(sdata)
In [21]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
In [22]:
obj4 = pd.Series(sdata, index=states)
In [15]:
obj4
Out[15]:
In [16]:
obj4.isnull()
Out[16]:
In [17]:
pd.notnull(obj4)
Out[17]:
In [18]:
pd.isnull(obj4)
Out[18]:
In [24]:
obj4
Out[24]:
In [25]:
obj3+ obj4
Out[25]:
In [26]:
obj3
Out[26]:
In [27]:
obj4
Out[27]:
In [28]:
obj4['California']=40000
In [29]:
obj3+obj4
Out[29]:
In [31]:
obj4.name = 'population'
obj4.index.name='state'
In [32]:
obj4
Out[32]:
In [40]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
In [41]:
data
Out[41]:
In [42]:
frame = pd.DataFrame(data)
In [36]:
frame
Out[36]:
In [50]:
pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'])
Out[50]:
In [51]:
frame['year']
Out[51]:
In [52]:
frame.year
Out[52]:
In [53]:
frame.ix[3]
Out[53]:
In [56]:
'pop' in frame.columns
Out[56]:
In [59]:
3 in frame.index
Out[59]:
In [64]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
In [63]:
obj3.reindex(range(6), method='ffill')
Out[63]:
In [65]:
obj3
Out[65]:
In [67]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
In [69]:
obj.drop('b')
Out[69]:
In [71]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
In [72]:
data
Out[72]:
In [75]:
data.drop(['one'], axis=1)
Out[75]:
In [77]:
from pandas import *
obj = Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
In [81]:
obj[['a','c']]
Out[81]:
In [82]:
obj[2:4]
Out[82]:
In [83]:
obj
Out[83]:
In [84]:
obj[obj<2]
Out[84]:
In [85]:
data = DataFrame(np.arange(16).reshape((4, 4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],
columns=['one', 'two', 'three', 'four'])
In [86]:
data
Out[86]:
In [88]:
data[['two','four']]
Out[88]:
In [90]:
data[data['three']>5]
Out[90]:
In [91]:
data <5
Out[91]:
In [92]:
data [ data <5 ]
Out[92]:
In [93]:
data.ix[['Colorado', 'Utah'], [3, 0, 1]]
Out[93]:
In [94]:
data
Out[94]:
In [95]:
data.ix[:'Utah', 'two']
Out[95]:
In [96]:
data.ix[data.three > 5, :3]
Out[96]:
In [97]:
s1 = Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
In [98]:
s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
In [101]:
s2+s1
Out[101]:
In [102]:
df1 = DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'),
index=['Ohio', 'Texas', 'Colorado'])
In [103]:
df2 = DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),
index=['Utah', 'Ohio', 'Texas', 'Oregon'])
In [104]:
df1+df2
Out[104]:
In [105]:
df1 = DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))
df2 = DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))
In [106]:
df1
Out[106]:
In [107]:
df2
Out[107]:
In [108]:
df1+df2
Out[108]:
In [109]:
df1.add(df2, fill_value=0)
Out[109]:
In [112]:
df1.reindex(columns=df2.columns, fill_value=0)
Out[112]:
In [111]:
df1
Out[111]:
In [113]:
arr = np.arange(12.).reshape((3, 4))
In [114]:
arr[0]
Out[114]:
In [115]:
arr - arr[0]
Out[115]:
In [116]:
arr
Out[116]:
In [117]:
frame = DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),
index=['Utah', 'Ohio', 'Texas', 'Oregon'])
In [118]:
frame.ix[0]
Out[118]:
In [119]:
frame
Out[119]:
In [124]:
frame = DataFrame(np.random.randn(4, 3), columns=list('bde'),
index=['Utah', 'Ohio', 'Texas', 'Oregon'])
In [126]:
np.abs(frame)
Out[126]:
In [127]:
f = lambda x: x.max() - x.min()
In [128]:
frame.apply(f)
Out[128]:
In [129]:
frame
Out[129]:
In [130]:
format = lambda x: '%.2f' % x
In [131]:
frame.applymap(format)
Out[131]:
In [132]:
frame = DataFrame(np.arange(8).reshape((2, 4)), index=['three', 'one'],
columns=['d', 'a', 'b', 'c'])
In [137]:
frame.sort_index(axis=1)
Out[137]:
In [134]:
frame
Out[134]:
In [138]:
frame.sort_index(axis=1, ascending=False)
Out[138]:
In [139]:
obj = Series([7, -5, 7, 4, 2, 0, 4])
In [140]:
obj.rank()
Out[140]:
In [141]:
df = DataFrame([[1.4, np.nan], [7.1, -4.5],
[np.nan, np.nan], [0.75, -1.3]],
index=['a', 'b', 'c', 'd'],
columns=['one', 'two'])
In [144]:
df.sort(axis=1)
Out[144]:
In [147]:
df.sum(axis=1)
Out[147]:
In [149]:
data = DataFrame({'Qu1': [1, 3, 4, 3, 4],
'Qu2': [2, 3, 1, 2, 3],
'Qu3': [1, 5, 2, 4, 4]})
In [150]:
data
Out[150]:
In [153]:
data.apply(pd.value_counts).fillna(0)
Out[153]:
In [155]:
data.apply(pd.value_counts, axis=1).fillna(0)
Out[155]:
In [156]:
string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado'])
In [157]:
string_data.isnull()
Out[157]:
In [158]:
string_data.fillna('none')
Out[158]:
In [159]:
from numpy import nan as NA
data = Series([1, NA, 3.5, NA, 7])
In [160]:
data.dropna()
Out[160]:
In [162]:
data[data.notnull()]
Out[162]:
In [163]:
data = DataFrame([[1., 6.5, 3.], [1., NA, NA],
[NA, NA, NA], [NA, 6.5, 3.]])
In [164]:
data
Out[164]:
In [165]:
data.dropna()
Out[165]:
In [166]:
data.dropna(how='all')
Out[166]:
In [169]:
data.dropna(axis=1, how='all')
Out[169]:
In [170]:
df = DataFrame(np.random.randn(7, 3))
In [176]:
df.ix[:4, 1] = NA; df.ix[:2, 2] = NA; df.ix[0,0]= NA
In [177]:
df
Out[177]:
In [179]:
df.dropna(axis=0, how='all')
Out[179]:
In [182]:
df.dropna(thresh=1)
Out[182]:
In [ ]: