In [1]:
import numpy as np
import pandas as pd
In [2]:
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()
Out[2]:
In [4]:
df['col2'].unique()
Out[4]:
In [6]:
len(df['col2'].unique()) # Checking the number of unique values
Out[6]:
In [8]:
df['col2'].nunique() # .nunique() is the same as len()
Out[8]:
In [9]:
df['col2'].value_counts()
Out[9]:
In [11]:
df[df['col1']>2]
Out[11]:
In [12]:
df[(df['col1']>2) & (df['col2']==444)]
Out[12]:
In [13]:
def times2(x):
return x*2
In [14]:
df['col1'].apply(times2) # wow much cool
Out[14]:
In [15]:
df['col3'].apply(len)
Out[15]:
In [17]:
df['col2'].apply(lambda x: x*2) # coolwow
Out[17]:
In [20]:
df.columns
Out[20]:
In [22]:
df.index
Out[22]:
In [24]:
df.sort_values('col2')
Out[24]:
In [25]:
df.isnull()
Out[25]:
In [28]:
data = {'A':['foo','foo','foo','bar','bar','bar'],
'B':['one','one','two','two','one','one'],
'C':['x','y','x','y','x','y'],
'D':[1,3,2,5,4,1]}
df = pd.DataFrame(data)
df
Out[28]:
In [29]:
df.pivot_table(values='D',index=['A','B'],columns='C')
Out[29]:
In [ ]: