In [1]:
import pandas as pd
from pandas import Series, DataFrame
In [3]:
obj = Series([4, 7, -5, 3])
obj
Out[3]:
In [4]:
obj.values
Out[4]:
In [5]:
obj.index
Out[5]:
In [6]:
obj2 = Series([4, 7, -5, 3], index = ['d', 'b', 'a', 'c'])
obj2
Out[6]:
In [7]:
obj2.index
Out[7]:
In [8]:
obj2['a']
Out[8]:
In [9]:
obj2['d']
Out[9]:
In [10]:
obj2[['c', 'a', 'd']]
Out[10]:
In [11]:
obj2[obj2 > 0]
Out[11]:
In [12]:
obj2 * 2
Out[12]:
In [2]:
import numpy as np
np.exp(obj2)
In [14]:
'b' in obj2
Out[14]:
In [15]:
sdata = {'Ohio' : 35000, 'Texas' : 71000, 'Oregon' : 16000, 'Utah' : 5000}
obj3 = Series(sdata)
obj3
Out[15]:
In [16]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index = states)
obj4
Out[16]:
In [17]:
pd.isnull(obj4)
Out[17]:
In [18]:
pd.notnull(obj4)
Out[18]:
In [19]:
obj3
Out[19]:
In [20]:
obj4
Out[20]:
In [21]:
obj3 + obj4
Out[21]:
In [22]:
obj4.name = 'population'
obj4.index.name = 'state'
obj4
Out[22]:
In [23]:
obj
Out[23]:
In [25]:
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
obj
Out[25]:
In [2]:
data = {'state' : ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year' : [2000, 2001, 2002, 2001, 2002],
'pop' : [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)
frame
Out[2]:
In [5]:
frame2 = DataFrame(data, columns = ['year', 'state', 'pop', 'debt'],
index = ['one', 'two', 'three', 'four', 'five'])
frame2
Out[5]:
In [6]:
frame2.columns
Out[6]:
In [9]:
frame2['year']
Out[9]:
In [10]:
frame2.state
Out[10]:
In [13]:
frame2.debt = np.arange(5.)
frame2
Out[13]:
In [14]:
val = Series([-1.2, -1.5, -1.7], index = ['two', 'four', 'five'])
frame2['debt'] = val
frame2
Out[14]:
In [15]:
frame2['eastern'] = frame2.state == 'Ohio'
frame2
Out[15]:
In [16]:
frame2.columns
Out[16]:
In [17]:
del frame2['eastern']
In [18]:
frame2.columns
Out[18]:
In [19]:
pop = {'Nevada' : {2001 : 2.4, 2002 : 2.9},
'Ohio' : {2000 : 1.5, 2001 : 1.7, 2002 : 3.6}}
frame3 = DataFrame(pop)
frame3
Out[19]:
In [20]:
frame3.T
Out[20]:
In [22]:
DataFrame(pop, index = [2001, 2002, 2003])
Out[22]:
In [25]:
pdata = {'Ohio' : frame3['Ohio'][:-1],
'Nevada': frame3['Nevada'][:2]}
DataFrame(pdata)
Out[25]:
In [28]:
frame3.index.name = 'year'
frame3.columns.name = 'state'
frame3
Out[28]:
In [29]:
frame3.values
Out[29]:
In [30]:
frame2.values
Out[30]:
In [31]:
obj = Series(range(3), index = ['a', 'b', 'c'])
index = obj.index
index
Out[31]:
In [32]:
index[1:]
Out[32]:
In [33]:
index[1] = 'd'
In [34]:
index = pd.Index(np.arange(3))
obj2 = Series([1.5, -2.5, 0], index = index)
obj2.index is index
Out[34]:
In [3]:
obj = Series([4.5, 7.2, -5.3, 3.6], index = ['d', 'b', 'a', 'c'])
obj
Out[3]:
In [4]:
obj2 = obj.reindex(['a', 'b','c', 'd', 'e'])
obj2
Out[4]:
In [5]:
obj
Out[5]:
In [6]:
obj.reindex(['a', 'b','c', 'd', 'e'], fill_value = 0)
Out[6]:
In [7]:
obj3 = Series(['blue', 'purple', 'yellow'], index = [0, 2, 4])
obj3
Out[7]:
In [8]:
obj3.reindex(np.arange(6), method = 'ffill')
Out[8]:
In [9]:
frame = DataFrame(np.arange(9).reshape((3, 3)), index = ['a', 'c', 'd'],
columns = ['Ohio', 'Texas', 'California'])
frame
Out[9]:
In [10]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2
Out[10]:
In [11]:
states = ['Texas', 'Utah', 'California']
frame.reindex(columns = states)
Out[11]:
In [12]:
frame.reindex(index = ['a', 'b', 'c', 'd'], method = 'ffill',
columns = states)
Out[12]:
In [13]:
frame.ix[['a', 'b', 'c', 'd'], states]
Out[13]:
In [14]:
obj = Series(np.arange(5.), index = ['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
new_obj
Out[14]:
In [15]:
obj.drop(['d', 'c'])
Out[15]:
In [16]:
obj
Out[16]:
In [17]:
data = DataFrame(np.arange(16).reshape((4, 4)),
index = ['Ohio', 'Colorado', 'Utah', 'New York'],
columns = ['one', 'two', 'three', 'four'])
data
Out[17]:
In [18]:
data.drop(['Colorado', 'Ohio'])
Out[18]:
In [19]:
data.drop('two', axis = 1)
Out[19]:
In [20]:
data.drop(['two', 'four'], axis = 1)
Out[20]:
In [21]:
obj = Series(np.arange(4), index = ['a', 'b', 'c', 'd'])
print(obj['b'], obj[1])
In [23]:
print(obj[2:], '\n', obj[['c', 'd']])
In [24]:
obj['a':'d']
Out[24]:
In [25]:
data
Out[25]:
In [26]:
data['two']
Out[26]:
In [27]:
data[['two', 'three']]
Out[27]:
In [28]:
data[:2]
Out[28]:
In [29]:
data[data['three'] > 5]
Out[29]:
In [31]:
data < 5
Out[31]:
In [34]:
data[data < 5] = 0
data
Out[34]:
In [35]:
data.ix['Colorado', ['two', 'three']]
Out[35]:
In [36]:
data.ix[['Colorado', 'Utah'], [3, 0, 1]]
Out[36]:
In [37]:
data.ix[:'Utah', 'two']
Out[37]:
In [41]:
data.ix[data.three > 5, :3]
Out[41]:
In [43]:
s1 = Series([7.3, -2.5, 3.4, 1.5], index = ['a', 'c', 'd', 'e'])
s2 = Series([-2.1, 3.6, -1.5, 4., 3.1], index = ['a', 'c', 'e', 'f', 'g'])
s1 + s2
Out[43]:
In [46]:
df1 = DataFrame(np.arange(9.).reshape((3, 3)),
columns = list('bcd'),
index = ['Ohio', 'Texas', 'Colorado'])
df2 = DataFrame(np.arange(12.).reshape((4, 3)),
columns = list('bde'),
index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
In [47]:
df1
Out[47]:
In [48]:
df2
Out[48]:
In [49]:
df1 + df2
Out[49]:
In [50]:
df1 = DataFrame(np.arange(12.).reshape((3, 4)), columns = list('abcd'))
df2 = DataFrame(np.arange(20.).reshape((4, 5)), columns = list('abcde'))
In [51]:
df1
Out[51]:
In [52]:
df2
Out[52]:
In [53]:
df1 + df2
Out[53]:
In [54]:
df1.add(df2, fill_value = 0.0)
Out[54]:
In [57]:
df1.reindex(columns = df2.columns, fill_value = 0.)
Out[57]:
In [58]:
arr = np.arange(12).reshape((3, 4))
arr
Out[58]:
In [59]:
arr - arr[0]
Out[59]:
In [61]:
frame = DataFrame(np.arange(12).reshape((4, 3)), columns = list('bde'),
index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
series = frame.ix[0]
In [62]:
frame
Out[62]:
In [63]:
series
Out[63]:
In [64]:
frame - series
Out[64]:
In [65]:
sereis2 = Series(range(3), index = list('bef'))
sereis2
Out[65]:
In [66]:
frame + sereis2
Out[66]:
In [67]:
series3 = frame['d']
In [68]:
frame
Out[68]:
In [69]:
series3
Out[69]:
In [70]:
frame - series3
Out[70]:
In [71]:
frame.sub(series3, axis = 0)
Out[71]:
In [74]:
frame = DataFrame(np.random.randn(4, 3), columns = list('bde'),
index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
frame
Out[74]:
In [75]:
np.abs(frame)
Out[75]:
In [76]:
f = lambda x: x.max() - x.min()
frame.apply(f)
Out[76]:
In [77]:
frame.apply(f, axis = 1)
Out[77]:
In [78]:
f = lambda x: Series([x.min(), x.max()], index = ['min', 'max'])
frame.apply(f)
Out[78]:
In [79]:
frame.apply(f, axis = 1)
Out[79]:
In [80]:
my_format = lambda x: '%.2f' % x
frame.applymap(my_format)
Out[80]:
In [81]:
frame['e'].map(my_format)
Out[81]:
In [82]:
obj = Series(np.arange(4), index = list('dabc'))
obj.sort_index()
Out[82]:
In [83]:
frame = DataFrame(np.arange(8).reshape((2, 4)),
index = ['three', 'one'],
columns = list('dabc'))
frame.sort_index()
Out[83]:
In [84]:
frame.sort_index(axis = 1)
Out[84]:
In [85]:
frame.sort_index(axis = 1, ascending = False)
Out[85]:
In [88]:
obj = Series([4, 7, -3, 2])
obj
Out[88]:
In [90]:
obj.sort_values()
Out[90]:
In [91]:
obj = Series([4, np.nan, 7, np.nan, -3, 2])
obj.sort_values()
Out[91]:
In [92]:
frame = DataFrame({'b' : [4, 7, -3, 2], 'a' : [0, 1, 0, 1]})
frame
Out[92]:
In [94]:
frame.sort_values(by = 'b')
Out[94]:
In [95]:
frame.sort_values(by = ['a', 'b'])
Out[95]:
In [96]:
obj = Series([7, -5, 7, 4, 2, 0, 4])
obj.rank()
Out[96]:
In [97]:
obj.rank(method = 'first')
Out[97]:
In [100]:
obj.rank(ascending = False, method = 'max')
Out[100]:
In [101]:
frame
Out[101]:
In [102]:
frame.rank(axis = 1)
Out[102]:
In [3]:
df = DataFrame([[1.4, np.nan],
[np.nan, np.nan],
[7.1, -4.5],
[0.75, -1.3]],
index = list('abcd'),
columns = ['one', 'two'])
df
Out[3]:
In [4]:
df.sum()
Out[4]:
In [5]:
df.sum(axis = 1)
Out[5]:
In [7]:
df.sum(axis = 1, skipna = False)
Out[7]:
In [8]:
df.idxmax()
Out[8]:
In [9]:
df.cumsum()
Out[9]:
In [10]:
df.describe()
Out[10]:
In [11]:
obj = Series(list('aabc' * 4))
obj.describe()
Out[11]:
In [ ]: