Chapter_05_Part_01



In [1]:
import pandas as pd
from pandas import Series, DataFrame

In [3]:
obj = Series([4, 7, -5, 3])
obj


Out[3]:
0    4
1    7
2   -5
3    3
dtype: int64

In [4]:
obj.values


Out[4]:
array([ 4,  7, -5,  3])

In [5]:
obj.index


Out[5]:
RangeIndex(start=0, stop=4, step=1)

In [6]:
obj2 = Series([4, 7, -5, 3], index = ['d', 'b', 'a', 'c'])
obj2


Out[6]:
d    4
b    7
a   -5
c    3
dtype: int64

In [7]:
obj2.index


Out[7]:
Index(['d', 'b', 'a', 'c'], dtype='object')

In [8]:
obj2['a']


Out[8]:
-5

In [9]:
obj2['d']


Out[9]:
4

In [10]:
obj2[['c', 'a', 'd']]


Out[10]:
c    3
a   -5
d    4
dtype: int64

In [11]:
obj2[obj2 > 0]


Out[11]:
d    4
b    7
c    3
dtype: int64

In [12]:
obj2 * 2


Out[12]:
d     8
b    14
a   -10
c     6
dtype: int64

In [2]:
import numpy as np
np.exp(obj2)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-7be2998d564e> in <module>()
      1 import numpy as np
----> 2 np.exp(obj2)

NameError: name 'obj2' is not defined

In [14]:
'b' in obj2


Out[14]:
True

In [15]:
sdata = {'Ohio' : 35000, 'Texas' : 71000, 'Oregon' : 16000, 'Utah' : 5000}
obj3 = Series(sdata)
obj3


Out[15]:
Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64

In [16]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index = states)
obj4


Out[16]:
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [17]:
pd.isnull(obj4)


Out[17]:
California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [18]:
pd.notnull(obj4)


Out[18]:
California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [19]:
obj3


Out[19]:
Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64

In [20]:
obj4


Out[20]:
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [21]:
obj3 + obj4


Out[21]:
California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [22]:
obj4.name = 'population'
obj4.index.name = 'state'
obj4


Out[22]:
state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

In [23]:
obj


Out[23]:
0    4
1    7
2   -5
3    3
dtype: int64

In [25]:
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
obj


Out[25]:
Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

In [2]:
data = {'state' : ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
        'year' : [2000, 2001, 2002, 2001, 2002],
        'pop' : [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)
frame


Out[2]:
pop state year
0 1.5 Ohio 2000
1 1.7 Ohio 2001
2 3.6 Ohio 2002
3 2.4 Nevada 2001
4 2.9 Nevada 2002

In [5]:
frame2 = DataFrame(data, columns = ['year', 'state', 'pop', 'debt'],
                   index = ['one', 'two', 'three', 'four', 'five'])
frame2


Out[5]:
year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 NaN
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 NaN
five 2002 Nevada 2.9 NaN

In [6]:
frame2.columns


Out[6]:
Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [9]:
frame2['year']


Out[9]:
one      2000
two      2001
three    2002
four     2001
five     2002
Name: year, dtype: int64

In [10]:
frame2.state


Out[10]:
one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object

In [13]:
frame2.debt = np.arange(5.)
frame2


Out[13]:
year state pop debt
one 2000 Ohio 1.5 0.0
two 2001 Ohio 1.7 1.0
three 2002 Ohio 3.6 2.0
four 2001 Nevada 2.4 3.0
five 2002 Nevada 2.9 4.0

In [14]:
val = Series([-1.2, -1.5, -1.7], index = ['two', 'four', 'five'])
frame2['debt'] = val
frame2


Out[14]:
year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 -1.2
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 -1.5
five 2002 Nevada 2.9 -1.7

In [15]:
frame2['eastern'] = frame2.state == 'Ohio'
frame2


Out[15]:
year state pop debt eastern
one 2000 Ohio 1.5 NaN True
two 2001 Ohio 1.7 -1.2 True
three 2002 Ohio 3.6 NaN True
four 2001 Nevada 2.4 -1.5 False
five 2002 Nevada 2.9 -1.7 False

In [16]:
frame2.columns


Out[16]:
Index(['year', 'state', 'pop', 'debt', 'eastern'], dtype='object')

In [17]:
del frame2['eastern']

In [18]:
frame2.columns


Out[18]:
Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [19]:
pop = {'Nevada' : {2001 : 2.4, 2002 : 2.9},
       'Ohio' : {2000 : 1.5, 2001 : 1.7, 2002 : 3.6}}
frame3 = DataFrame(pop)
frame3


Out[19]:
Nevada Ohio
2000 NaN 1.5
2001 2.4 1.7
2002 2.9 3.6

In [20]:
frame3.T


Out[20]:
2000 2001 2002
Nevada NaN 2.4 2.9
Ohio 1.5 1.7 3.6

In [22]:
DataFrame(pop, index = [2001, 2002, 2003])


Out[22]:
Nevada Ohio
2001 2.4 1.7
2002 2.9 3.6
2003 NaN NaN

In [25]:
pdata = {'Ohio' : frame3['Ohio'][:-1],
         'Nevada': frame3['Nevada'][:2]}
DataFrame(pdata)


Out[25]:
Nevada Ohio
2000 NaN 1.5
2001 2.4 1.7

In [28]:
frame3.index.name = 'year'
frame3.columns.name = 'state'
frame3


Out[28]:
state Nevada Ohio
year
2000 NaN 1.5
2001 2.4 1.7
2002 2.9 3.6

In [29]:
frame3.values


Out[29]:
array([[ nan,  1.5],
       [ 2.4,  1.7],
       [ 2.9,  3.6]])

In [30]:
frame2.values


Out[30]:
array([[2000, 'Ohio', 1.5, nan],
       [2001, 'Ohio', 1.7, -1.2],
       [2002, 'Ohio', 3.6, nan],
       [2001, 'Nevada', 2.4, -1.5],
       [2002, 'Nevada', 2.9, -1.7]], dtype=object)

In [31]:
obj = Series(range(3), index = ['a', 'b', 'c'])
index = obj.index
index


Out[31]:
Index(['a', 'b', 'c'], dtype='object')

In [32]:
index[1:]


Out[32]:
Index(['b', 'c'], dtype='object')

In [33]:
index[1] = 'd'


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-33-676fdeb26a68> in <module>()
----> 1 index[1] = 'd'

C:\Users\user\Anaconda3\lib\site-packages\pandas\indexes\base.py in __setitem__(self, key, value)
   1402 
   1403     def __setitem__(self, key, value):
-> 1404         raise TypeError("Index does not support mutable operations")
   1405 
   1406     def __getitem__(self, key):

TypeError: Index does not support mutable operations

In [34]:
index = pd.Index(np.arange(3))
obj2 = Series([1.5, -2.5, 0], index = index)
obj2.index is index


Out[34]:
True

In [3]:
obj = Series([4.5, 7.2, -5.3, 3.6], index = ['d', 'b', 'a', 'c'])
obj


Out[3]:
d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [4]:
obj2 = obj.reindex(['a', 'b','c', 'd', 'e'])
obj2


Out[4]:
a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [5]:
obj


Out[5]:
d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [6]:
obj.reindex(['a', 'b','c', 'd', 'e'], fill_value = 0)


Out[6]:
a   -5.3
b    7.2
c    3.6
d    4.5
e    0.0
dtype: float64

In [7]:
obj3 = Series(['blue', 'purple', 'yellow'], index = [0, 2, 4])
obj3


Out[7]:
0      blue
2    purple
4    yellow
dtype: object

In [8]:
obj3.reindex(np.arange(6), method = 'ffill')


Out[8]:
0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [9]:
frame = DataFrame(np.arange(9).reshape((3, 3)), index = ['a', 'c', 'd'], 
                  columns = ['Ohio', 'Texas', 'California'])
frame


Out[9]:
Ohio Texas California
a 0 1 2
c 3 4 5
d 6 7 8

In [10]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2


Out[10]:
Ohio Texas California
a 0.0 1.0 2.0
b NaN NaN NaN
c 3.0 4.0 5.0
d 6.0 7.0 8.0

In [11]:
states = ['Texas', 'Utah', 'California']
frame.reindex(columns = states)


Out[11]:
Texas Utah California
a 1 NaN 2
c 4 NaN 5
d 7 NaN 8

In [12]:
frame.reindex(index = ['a', 'b', 'c', 'd'], method = 'ffill',
              columns = states)


Out[12]:
Texas Utah California
a 1 NaN 2
b 1 NaN 2
c 4 NaN 5
d 7 NaN 8

In [13]:
frame.ix[['a', 'b', 'c', 'd'], states]


Out[13]:
Texas Utah California
a 1.0 NaN 2.0
b NaN NaN NaN
c 4.0 NaN 5.0
d 7.0 NaN 8.0

In [14]:
obj = Series(np.arange(5.), index = ['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
new_obj


Out[14]:
a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [15]:
obj.drop(['d', 'c'])


Out[15]:
a    0.0
b    1.0
e    4.0
dtype: float64

In [16]:
obj


Out[16]:
a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [17]:
data = DataFrame(np.arange(16).reshape((4, 4)),
                 index = ['Ohio', 'Colorado', 'Utah', 'New York'],
                 columns = ['one', 'two', 'three', 'four'])
data


Out[17]:
one two three four
Ohio 0 1 2 3
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15

In [18]:
data.drop(['Colorado', 'Ohio'])


Out[18]:
one two three four
Utah 8 9 10 11
New York 12 13 14 15

In [19]:
data.drop('two', axis = 1)


Out[19]:
one three four
Ohio 0 2 3
Colorado 4 6 7
Utah 8 10 11
New York 12 14 15

In [20]:
data.drop(['two', 'four'], axis = 1)


Out[20]:
one three
Ohio 0 2
Colorado 4 6
Utah 8 10
New York 12 14

In [21]:
obj = Series(np.arange(4), index = ['a', 'b', 'c', 'd'])
print(obj['b'], obj[1])


1 1

In [23]:
print(obj[2:], '\n', obj[['c', 'd']])


c    2
d    3
dtype: int64 
 c    2
d    3
dtype: int64

In [24]:
obj['a':'d']


Out[24]:
a    0
b    1
c    2
d    3
dtype: int64

In [25]:
data


Out[25]:
one two three four
Ohio 0 1 2 3
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15

In [26]:
data['two']


Out[26]:
Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int64

In [27]:
data[['two', 'three']]


Out[27]:
two three
Ohio 1 2
Colorado 5 6
Utah 9 10
New York 13 14

In [28]:
data[:2]


Out[28]:
one two three four
Ohio 0 1 2 3
Colorado 4 5 6 7

In [29]:
data[data['three'] > 5]


Out[29]:
one two three four
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15

In [31]:
data < 5


Out[31]:
one two three four
Ohio True True True True
Colorado True False False False
Utah False False False False
New York False False False False

In [34]:
data[data < 5] = 0
data


Out[34]:
one two three four
Ohio 0 0 0 0
Colorado 0 5 6 7
Utah 8 9 10 11
New York 12 13 14 15

In [35]:
data.ix['Colorado', ['two', 'three']]


Out[35]:
two      5
three    6
Name: Colorado, dtype: int64

In [36]:
data.ix[['Colorado', 'Utah'], [3, 0, 1]]


Out[36]:
four one two
Colorado 7 0 5
Utah 11 8 9

In [37]:
data.ix[:'Utah', 'two']


Out[37]:
Ohio        0
Colorado    5
Utah        9
Name: two, dtype: int64

In [41]:
data.ix[data.three > 5, :3]


Out[41]:
one two three
Colorado 0 5 6
Utah 8 9 10
New York 12 13 14

In [43]:
s1 = Series([7.3, -2.5, 3.4, 1.5], index = ['a', 'c', 'd', 'e'])
s2 = Series([-2.1, 3.6, -1.5, 4., 3.1], index = ['a', 'c', 'e', 'f', 'g'])
s1 + s2


Out[43]:
a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [46]:
df1 = DataFrame(np.arange(9.).reshape((3, 3)), 
               columns = list('bcd'),
               index = ['Ohio', 'Texas', 'Colorado'])
df2 = DataFrame(np.arange(12.).reshape((4, 3)),
               columns = list('bde'),
               index = ['Utah', 'Ohio', 'Texas', 'Oregon'])

In [47]:
df1


Out[47]:
b c d
Ohio 0.0 1.0 2.0
Texas 3.0 4.0 5.0
Colorado 6.0 7.0 8.0

In [48]:
df2


Out[48]:
b d e
Utah 0.0 1.0 2.0
Ohio 3.0 4.0 5.0
Texas 6.0 7.0 8.0
Oregon 9.0 10.0 11.0

In [49]:
df1 + df2


Out[49]:
b c d e
Colorado NaN NaN NaN NaN
Ohio 3.0 NaN 6.0 NaN
Oregon NaN NaN NaN NaN
Texas 9.0 NaN 12.0 NaN
Utah NaN NaN NaN NaN

In [50]:
df1 = DataFrame(np.arange(12.).reshape((3, 4)), columns = list('abcd'))
df2 = DataFrame(np.arange(20.).reshape((4, 5)), columns = list('abcde'))

In [51]:
df1


Out[51]:
a b c d
0 0.0 1.0 2.0 3.0
1 4.0 5.0 6.0 7.0
2 8.0 9.0 10.0 11.0

In [52]:
df2


Out[52]:
a b c d e
0 0.0 1.0 2.0 3.0 4.0
1 5.0 6.0 7.0 8.0 9.0
2 10.0 11.0 12.0 13.0 14.0
3 15.0 16.0 17.0 18.0 19.0

In [53]:
df1 + df2


Out[53]:
a b c d e
0 0.0 2.0 4.0 6.0 NaN
1 9.0 11.0 13.0 15.0 NaN
2 18.0 20.0 22.0 24.0 NaN
3 NaN NaN NaN NaN NaN

In [54]:
df1.add(df2, fill_value = 0.0)


Out[54]:
a b c d e
0 0.0 2.0 4.0 6.0 4.0
1 9.0 11.0 13.0 15.0 9.0
2 18.0 20.0 22.0 24.0 14.0
3 15.0 16.0 17.0 18.0 19.0

In [57]:
df1.reindex(columns = df2.columns, fill_value = 0.)


Out[57]:
a b c d e
0 0.0 1.0 2.0 3.0 0.0
1 4.0 5.0 6.0 7.0 0.0
2 8.0 9.0 10.0 11.0 0.0

In [58]:
arr = np.arange(12).reshape((3, 4))
arr


Out[58]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [59]:
arr - arr[0]


Out[59]:
array([[0, 0, 0, 0],
       [4, 4, 4, 4],
       [8, 8, 8, 8]])

In [61]:
frame = DataFrame(np.arange(12).reshape((4, 3)), columns = list('bde'),
                  index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
series = frame.ix[0]

In [62]:
frame


Out[62]:
b d e
Utah 0 1 2
Ohio 3 4 5
Texas 6 7 8
Oregon 9 10 11

In [63]:
series


Out[63]:
b    0
d    1
e    2
Name: Utah, dtype: int64

In [64]:
frame - series


Out[64]:
b d e
Utah 0 0 0
Ohio 3 3 3
Texas 6 6 6
Oregon 9 9 9

In [65]:
sereis2 = Series(range(3), index = list('bef'))
sereis2


Out[65]:
b    0
e    1
f    2
dtype: int64

In [66]:
frame + sereis2


Out[66]:
b d e f
Utah 0.0 NaN 3.0 NaN
Ohio 3.0 NaN 6.0 NaN
Texas 6.0 NaN 9.0 NaN
Oregon 9.0 NaN 12.0 NaN

In [67]:
series3 = frame['d']

In [68]:
frame


Out[68]:
b d e
Utah 0 1 2
Ohio 3 4 5
Texas 6 7 8
Oregon 9 10 11

In [69]:
series3


Out[69]:
Utah       1
Ohio       4
Texas      7
Oregon    10
Name: d, dtype: int64

In [70]:
frame - series3


Out[70]:
Ohio Oregon Texas Utah b d e
Utah NaN NaN NaN NaN NaN NaN NaN
Ohio NaN NaN NaN NaN NaN NaN NaN
Texas NaN NaN NaN NaN NaN NaN NaN
Oregon NaN NaN NaN NaN NaN NaN NaN

In [71]:
frame.sub(series3, axis = 0)


Out[71]:
b d e
Utah -1 0 1
Ohio -1 0 1
Texas -1 0 1
Oregon -1 0 1

In [74]:
frame = DataFrame(np.random.randn(4, 3), columns = list('bde'),
                  index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
frame


Out[74]:
b d e
Utah 1.092852 0.499297 -0.638768
Ohio -1.704845 -0.272309 0.086455
Texas 1.290658 -0.925376 1.098472
Oregon 0.322673 0.777713 -0.556797

In [75]:
np.abs(frame)


Out[75]:
b d e
Utah 1.092852 0.499297 0.638768
Ohio 1.704845 0.272309 0.086455
Texas 1.290658 0.925376 1.098472
Oregon 0.322673 0.777713 0.556797

In [76]:
f = lambda x: x.max() - x.min()
frame.apply(f)


Out[76]:
b    2.995503
d    1.703088
e    1.737239
dtype: float64

In [77]:
frame.apply(f, axis = 1)


Out[77]:
Utah      1.731620
Ohio      1.791300
Texas     2.216034
Oregon    1.334510
dtype: float64

In [78]:
f = lambda x: Series([x.min(), x.max()], index = ['min', 'max'])
frame.apply(f)


Out[78]:
b d e
min -1.704845 -0.925376 -0.638768
max 1.290658 0.777713 1.098472

In [79]:
frame.apply(f, axis = 1)


Out[79]:
min max
Utah -0.638768 1.092852
Ohio -1.704845 0.086455
Texas -0.925376 1.290658
Oregon -0.556797 0.777713

In [80]:
my_format = lambda x: '%.2f' % x
frame.applymap(my_format)


Out[80]:
b d e
Utah 1.09 0.50 -0.64
Ohio -1.70 -0.27 0.09
Texas 1.29 -0.93 1.10
Oregon 0.32 0.78 -0.56

In [81]:
frame['e'].map(my_format)


Out[81]:
Utah      -0.64
Ohio       0.09
Texas      1.10
Oregon    -0.56
Name: e, dtype: object

In [82]:
obj = Series(np.arange(4), index = list('dabc'))
obj.sort_index()


Out[82]:
a    1
b    2
c    3
d    0
dtype: int64

In [83]:
frame = DataFrame(np.arange(8).reshape((2, 4)), 
                  index = ['three', 'one'], 
                  columns = list('dabc'))
frame.sort_index()


Out[83]:
d a b c
one 4 5 6 7
three 0 1 2 3

In [84]:
frame.sort_index(axis = 1)


Out[84]:
a b c d
three 1 2 3 0
one 5 6 7 4

In [85]:
frame.sort_index(axis = 1, ascending = False)


Out[85]:
d c b a
three 0 3 2 1
one 4 7 6 5

In [88]:
obj = Series([4, 7, -3, 2])
obj


Out[88]:
0    4
1    7
2   -3
3    2
dtype: int64

In [90]:
obj.sort_values()


Out[90]:
2   -3
3    2
0    4
1    7
dtype: int64

In [91]:
obj = Series([4, np.nan, 7, np.nan, -3, 2])
obj.sort_values()


Out[91]:
4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64

In [92]:
frame = DataFrame({'b' : [4, 7, -3, 2], 'a' : [0, 1, 0, 1]})
frame


Out[92]:
a b
0 0 4
1 1 7
2 0 -3
3 1 2

In [94]:
frame.sort_values(by = 'b')


Out[94]:
a b
2 0 -3
3 1 2
0 0 4
1 1 7

In [95]:
frame.sort_values(by = ['a', 'b'])


Out[95]:
a b
2 0 -3
0 0 4
3 1 2
1 1 7

In [96]:
obj = Series([7, -5, 7, 4, 2, 0, 4])
obj.rank()


Out[96]:
0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [97]:
obj.rank(method = 'first')


Out[97]:
0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [100]:
obj.rank(ascending = False, method = 'max')


Out[100]:
0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64

In [101]:
frame


Out[101]:
a b
0 0 4
1 1 7
2 0 -3
3 1 2

In [102]:
frame.rank(axis = 1)


Out[102]:
a b
0 1.0 2.0
1 1.0 2.0
2 2.0 1.0
3 1.0 2.0

In [3]:
df = DataFrame([[1.4, np.nan], 
                [np.nan, np.nan],
                [7.1, -4.5],
                [0.75, -1.3]],
              index = list('abcd'),
              columns = ['one', 'two'])
df


Out[3]:
one two
a 1.40 NaN
b NaN NaN
c 7.10 -4.5
d 0.75 -1.3

In [4]:
df.sum()


Out[4]:
one    9.25
two   -5.80
dtype: float64

In [5]:
df.sum(axis = 1)


Out[5]:
a    1.40
b    0.00
c    2.60
d   -0.55
dtype: float64

In [7]:
df.sum(axis = 1, skipna = False)


Out[7]:
a     NaN
b     NaN
c    2.60
d   -0.55
dtype: float64

In [8]:
df.idxmax()


Out[8]:
one    c
two    d
dtype: object

In [9]:
df.cumsum()


Out[9]:
one two
a 1.40 NaN
b NaN NaN
c 8.50 -4.5
d 9.25 -5.8

In [10]:
df.describe()


Out[10]:
one two
count 3.000000 2.000000
mean 3.083333 -2.900000
std 3.493685 2.262742
min 0.750000 -4.500000
25% 1.075000 -3.700000
50% 1.400000 -2.900000
75% 4.250000 -2.100000
max 7.100000 -1.300000

In [11]:
obj = Series(list('aabc' * 4))
obj.describe()


Out[11]:
count     16
unique     3
top        a
freq       8
dtype: object

In [ ]: