In [2]:
import pandas as pd
import numpy as np
from IPython.display import display

df1 = pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]})
df1


Out[2]:
one three two
0 2 5 1
1 1 4 3
2 1 3 2
3 1 2 4

In [2]:
df1.sort_values(by='two')


Out[2]:
one three two
0 2 5 1
2 1 3 2
1 1 4 3
3 1 2 4

In [3]:
df1[['one', 'two', 'three']].sort_values(by=['one','two'])


Out[3]:
one two three
2 1 2 3
1 1 3 4
3 1 4 2
0 2 1 5

In [4]:
df1.sort_values(by=['one','two'])


Out[4]:
one three two
2 1 3 2
1 1 4 3
3 1 2 4
0 2 5 1

In [5]:
df = pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'],
                  columns=['one', 'two', 'three'])

display(df)

df2=df
#df2=df.copy()
df2.loc[['a','c','h'],['one']]=np.nan

df


one two three
a -0.688905 1.559642 -0.399407
c 0.368577 -0.901106 -0.227452
e 1.183389 -1.842312 1.757657
f 0.965310 -1.736156 0.003234
h 2.954985 -1.490437 0.529445
Out[5]:
one two three
a NaN 1.559642 -0.399407
c NaN -0.901106 -0.227452
e 1.183389 -1.842312 1.757657
f 0.965310 -1.736156 0.003234
h NaN -1.490437 0.529445

In [6]:
df['one'].sum()


Out[6]:
2.1486985947566177

In [7]:
df.mean(0)


Out[7]:
one      1.074349
two     -0.882074
three    0.332695
dtype: float64

In [8]:
df.mean(1)


Out[8]:
a    0.580118
c   -0.564279
e    0.366245
f   -0.255871
h   -0.480496
dtype: float64

In [9]:
display(df)
df.cumsum()


one two three
a NaN 1.559642 -0.399407
c NaN -0.901106 -0.227452
e 1.183389 -1.842312 1.757657
f 0.965310 -1.736156 0.003234
h NaN -1.490437 0.529445
Out[9]:
one two three
a NaN 1.559642 -0.399407
c NaN 0.658536 -0.626859
e 1.183389 -1.183776 1.130798
f 2.148699 -2.919932 1.134032
h NaN -4.410370 1.663477

In [10]:
display(df)
df.groupby('one').mean()


one two three
a NaN 1.559642 -0.399407
c NaN -0.901106 -0.227452
e 1.183389 -1.842312 1.757657
f 0.965310 -1.736156 0.003234
h NaN -1.490437 0.529445
Out[10]:
two three
one
0.965310 -1.736156 0.003234
1.183389 -1.842312 1.757657

In [11]:
df2= pd.DataFrame(np.random.randn(5, 3), 
                  index=['a', 'c', 'e', 'f', 'h'],
                  columns=['one', 'two', 'three'])
df2['four'] = 'bar'
df2['five'] = df['one'] > 0
df2['timestamp'] = pd.Timestamp('20120101')
display(df2)
df2.loc[['a','c','h'],['one','timestamp']] = np.nan
df2


one two three four five timestamp
a -1.001734 -0.519590 -0.363891 bar False 2012-01-01
c -1.179448 -0.409359 1.697587 bar False 2012-01-01
e 0.762478 -1.229147 0.826560 bar True 2012-01-01
f 0.367050 -0.346026 -0.531973 bar True 2012-01-01
h 0.675752 -0.251475 -0.275303 bar False 2012-01-01
Out[11]:
one two three four five timestamp
a NaN -0.519590 -0.363891 bar False NaT
c NaN -0.409359 1.697587 bar False NaT
e 0.762478 -1.229147 0.826560 bar True 2012-01-01
f 0.367050 -0.346026 -0.531973 bar True 2012-01-01
h NaN -0.251475 -0.275303 bar False NaT

In [12]:
df2.fillna(0)


Out[12]:
one two three four five timestamp
a 0.000000 -0.519590 -0.363891 bar False 1970-01-01
c 0.000000 -0.409359 1.697587 bar False 1970-01-01
e 0.762478 -1.229147 0.826560 bar True 2012-01-01
f 0.367050 -0.346026 -0.531973 bar True 2012-01-01
h 0.000000 -0.251475 -0.275303 bar False 1970-01-01

In [13]:
df2.fillna(method='pad')


Out[13]:
one two three four five timestamp
a NaN -0.519590 -0.363891 bar False NaT
c NaN -0.409359 1.697587 bar False NaT
e 0.762478 -1.229147 0.826560 bar True 2012-01-01
f 0.367050 -0.346026 -0.531973 bar True 2012-01-01
h 0.367050 -0.251475 -0.275303 bar False 2012-01-01

In [14]:
df2.fillna(method='bfill')


Out[14]:
one two three four five timestamp
a 0.762478 -0.519590 -0.363891 bar False 2012-01-01
c 0.762478 -0.409359 1.697587 bar False 2012-01-01
e 0.762478 -1.229147 0.826560 bar True 2012-01-01
f 0.367050 -0.346026 -0.531973 bar True 2012-01-01
h NaN -0.251475 -0.275303 bar False NaT

In [15]:
df2.fillna(df2.mean())


Out[15]:
one two three four five timestamp
a 0.564764 -0.519590 -0.363891 bar False NaT
c 0.564764 -0.409359 1.697587 bar False NaT
e 0.762478 -1.229147 0.826560 bar True 2012-01-01
f 0.367050 -0.346026 -0.531973 bar True 2012-01-01
h 0.564764 -0.251475 -0.275303 bar False NaT

In [16]:
df3 = pd.DataFrame({'A': [1, 2.1, np.nan, 4.7, 5.6, 6.8],
                    'B': [.25, np.nan, np.nan, 4, 12.2, 14.4]})
df3.interpolate()


Out[16]:
A B
0 1.0 0.25
1 2.1 1.50
2 3.4 2.75
3 4.7 4.00
4 5.6 12.20
5 6.8 14.40

In [7]:
sample = pd.DataFrame({'PR':[10,100,40] })
sample['PR'] = sample['PR'].mask(sample['PR'] < 90, np.nan)
sample


Out[7]:
PR
0 NaN
1 100.0
2 NaN

In [6]:
sample = pd.DataFrame({'PR':[10,100,40] })
sample.loc[sample['PR'] < 90, 'PR'] = np.nan
sample


Out[6]:
PR
0 NaN
1 100.0
2 NaN

In [ ]: