In [52]:
import pandas as pd
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()
Out[52]:
In [53]:
df['col2'].unique()
Out[53]:
In [54]:
df['col2'].nunique()
Out[54]:
In [55]:
df['col2'].value_counts()
Out[55]:
In [56]:
#Select from DataFrame using criteria from multiple columns
newdf = df[(df['col1']>2) & (df['col2']==444)]
In [57]:
newdf
Out[57]:
In [58]:
def times2(x):
return x*2
In [59]:
df['col1'].apply(times2)
Out[59]:
In [60]:
df['col3'].apply(len)
Out[60]:
In [61]:
df['col1'].sum()
Out[61]:
Permanently Removing a Column
In [62]:
del df['col1']
In [63]:
df
Out[63]:
Get column and index names:
In [64]:
df.columns
Out[64]:
In [65]:
df.index
Out[65]:
Sorting and Ordering a DataFrame:
In [66]:
df
Out[66]:
In [67]:
df.sort_values(by='col2') #inplace=False by default
Out[67]:
Find Null Values or Check for Null Values
In [68]:
df.isnull()
Out[68]:
In [69]:
# Drop rows with NaN Values
df.dropna()
Out[69]:
Filling in NaN values with something else:
In [71]:
import numpy as np
In [72]:
df = pd.DataFrame({'col1':[1,2,3,np.nan],
'col2':[np.nan,555,666,444],
'col3':['abc','def','ghi','xyz']})
df.head()
Out[72]:
In [75]:
df.fillna('FILL')
Out[75]:
In [89]:
data = {'A':['foo','foo','foo','bar','bar','bar'],
'B':['one','one','two','two','one','one'],
'C':['x','y','x','y','x','y'],
'D':[1,3,2,5,4,1]}
df = pd.DataFrame(data)
In [90]:
df
Out[90]:
In [91]:
df.pivot_table(values='D',index=['A', 'B'],columns=['C'])
Out[91]: