In [1]:
from pandas import Series, DataFrame

In [42]:
import pandas as pd
import numpy as np

In [3]:
obj = Series([4, 7, -5, 3])

In [4]:
obj


Out[4]:
0    4
1    7
2   -5
3    3
dtype: int64

In [5]:
obj.values


Out[5]:
array([ 4,  7, -5,  3], dtype=int64)

In [6]:
obj.index


Out[6]:
Int64Index([0, 1, 2, 3], dtype='int64')

In [7]:
obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])

In [8]:
obj2


Out[8]:
d    4
b    7
a   -5
c    3
dtype: int64

In [9]:
obj2.index


Out[9]:
Index([u'd', u'b', u'a', u'c'], dtype='object')

In [10]:
obj2['a']


Out[10]:
-5

In [11]:
obj2


Out[11]:
d    4
b    7
a   -5
c    3
dtype: int64

In [12]:
obj2[obj2 > 0]


Out[12]:
d    4
b    7
c    3
dtype: int64

In [13]:
obj2 * 2


Out[13]:
d     8
b    14
a   -10
c     6
dtype: int64

In [14]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

In [15]:
obj3 = Series(sdata)

In [16]:
obj3


Out[16]:
Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64

In [17]:
states = ['California', 'Ohio', 'Oregon', 'Texas']

In [18]:
obj4 = Series(sdata, index=states)

In [19]:
obj4


Out[19]:
California      NaN
Ohio          35000
Oregon        16000
Texas         71000
dtype: float64

In [20]:
pd.isnull(obj4)


Out[20]:
California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [21]:
pd.notnull(obj4)


Out[21]:
California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [22]:
obj4.isnull()


Out[22]:
California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [23]:
obj3 + obj4


Out[23]:
California       NaN
Ohio           70000
Oregon         32000
Texas         142000
Utah             NaN
dtype: float64

In [24]:
obj4.name = 'population'

In [25]:
obj4.index.name = 'state'

In [26]:
obj4


Out[26]:
state
California      NaN
Ohio          35000
Oregon        16000
Texas         71000
Name: population, dtype: float64

In [27]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}

In [28]:
frame = DataFrame(data)

In [29]:
frame


Out[29]:
pop state year
0 1.5 Ohio 2000
1 1.7 Ohio 2001
2 3.6 Ohio 2002
3 2.4 Nevada 2001
4 2.9 Nevada 2002

In [30]:
DataFrame(data, columns=['year', 'state', 'pop'])


Out[30]:
year state pop
0 2000 Ohio 1.5
1 2001 Ohio 1.7
2 2002 Ohio 3.6
3 2001 Nevada 2.4
4 2002 Nevada 2.9

In [31]:
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
              index=['one', 'two', 'three', 'four', 'five'])

In [32]:
frame2


Out[32]:
year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 NaN
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 NaN
five 2002 Nevada 2.9 NaN

In [33]:
frame2.columns


Out[33]:
Index([u'year', u'state', u'pop', u'debt'], dtype='object')

In [34]:
frame2['state']


Out[34]:
one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object

In [36]:
frame2.state


Out[36]:
one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object

In [37]:
frame2.ix['three']


Out[37]:
year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object

In [38]:
frame2['debt'] = 16.5

In [39]:
frame2


Out[39]:
year state pop debt
one 2000 Ohio 1.5 16.5
two 2001 Ohio 1.7 16.5
three 2002 Ohio 3.6 16.5
four 2001 Nevada 2.4 16.5
five 2002 Nevada 2.9 16.5

In [43]:
frame2['debt'] = np.arange(5.)

In [44]:
frame2


Out[44]:
year state pop debt
one 2000 Ohio 1.5 0
two 2001 Ohio 1.7 1
three 2002 Ohio 3.6 2
four 2001 Nevada 2.4 3
five 2002 Nevada 2.9 4

In [45]:
frame2['eastern'] = frame2.state == 'Ohio'

In [46]:
frame2


Out[46]:
year state pop debt eastern
one 2000 Ohio 1.5 0 True
two 2001 Ohio 1.7 1 True
three 2002 Ohio 3.6 2 True
four 2001 Nevada 2.4 3 False
five 2002 Nevada 2.9 4 False

In [47]:
del frame2['eastern']

In [48]:
frame2.columns


Out[48]:
Index([u'year', u'state', u'pop', u'debt'], dtype='object')

In [49]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [50]:
frame3 = DataFrame(pop)

In [51]:
pop


Out[51]:
{'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [52]:
frame3


Out[52]:
Nevada Ohio
2000 NaN 1.5
2001 2.4 1.7
2002 2.9 3.6

In [53]:
frame3.T


Out[53]:
2000 2001 2002
Nevada NaN 2.4 2.9
Ohio 1.5 1.7 3.6

In [54]:
DataFrame(pop, index=[2001, 2002, 2003])


Out[54]:
Nevada Ohio
2001 2.4 1.7
2002 2.9 3.6
2003 NaN NaN

In [55]:
frame3.values


Out[55]:
array([[ nan,  1.5],
       [ 2.4,  1.7],
       [ 2.9,  3.6]])

In [56]:
frame2.values


Out[56]:
array([[2000L, 'Ohio', 1.5, 0.0],
       [2001L, 'Ohio', 1.7, 1.0],
       [2002L, 'Ohio', 3.6, 2.0],
       [2001L, 'Nevada', 2.4, 3.0],
       [2002L, 'Nevada', 2.9, 4.0]], dtype=object)

In [ ]: