In [2]:
import pandas as pd
import numpy as np
from IPython.display import display
df1 = pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]})
df1
Out[2]:
In [2]:
df1.sort_values(by='two')
Out[2]:
In [3]:
df1[['one', 'two', 'three']].sort_values(by=['one','two'])
Out[3]:
In [4]:
df1.sort_values(by=['one','two'])
Out[4]:
In [5]:
df = pd.DataFrame(np.random.randn(5, 3),
index=['a', 'c', 'e', 'f', 'h'],
columns=['one', 'two', 'three'])
display(df)
df2=df
#df2=df.copy()
df2.loc[['a','c','h'],['one']]=np.nan
df
Out[5]:
In [6]:
df['one'].sum()
Out[6]:
In [7]:
df.mean(0)
Out[7]:
In [8]:
df.mean(1)
Out[8]:
In [9]:
display(df)
df.cumsum()
Out[9]:
In [10]:
display(df)
df.groupby('one').mean()
Out[10]:
In [11]:
df2= pd.DataFrame(np.random.randn(5, 3),
index=['a', 'c', 'e', 'f', 'h'],
columns=['one', 'two', 'three'])
df2['four'] = 'bar'
df2['five'] = df['one'] > 0
df2['timestamp'] = pd.Timestamp('20120101')
display(df2)
df2.loc[['a','c','h'],['one','timestamp']] = np.nan
df2
Out[11]:
In [12]:
df2.fillna(0)
Out[12]:
In [13]:
df2.fillna(method='pad')
Out[13]:
In [14]:
df2.fillna(method='bfill')
Out[14]:
In [15]:
df2.fillna(df2.mean())
Out[15]:
In [16]:
df3 = pd.DataFrame({'A': [1, 2.1, np.nan, 4.7, 5.6, 6.8],
'B': [.25, np.nan, np.nan, 4, 12.2, 14.4]})
df3.interpolate()
Out[16]:
In [7]:
sample = pd.DataFrame({'PR':[10,100,40] })
sample['PR'] = sample['PR'].mask(sample['PR'] < 90, np.nan)
sample
Out[7]:
In [6]:
sample = pd.DataFrame({'PR':[10,100,40] })
sample.loc[sample['PR'] < 90, 'PR'] = np.nan
sample
Out[6]:
In [ ]: