6.32 - Pandas operations



In [1]:

    
import numpy as np
import pandas as pd



In [2]:

    
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()

Finding unique values in a DataFrame

.unique(), .nunique(), and .value_counts()



In [4]:

    
df['col2'].unique()









    Out[4]:





array([444, 555, 666])



In [6]:

    
len(df['col2'].unique()) # Checking the number of unique values









    Out[6]:





3



In [8]:

    
df['col2'].nunique() # .nunique() is the same as len()









    Out[8]:





3



In [9]:

    
df['col2'].value_counts()









    Out[9]:





444    2
555    1
666    1
Name: col2, dtype: int64

Conditional selection again



In [11]:

    
df[df['col1']>2]



In [12]:

    
df[(df['col1']>2) & (df['col2']==444)]

The .apply() method



In [13]:

    
def times2(x):
    return x*2



In [14]:

    
df['col1'].apply(times2) # wow much cool









    Out[14]:





0    2
1    4
2    6
3    8
Name: col1, dtype: int64



In [15]:

    
df['col3'].apply(len)









    Out[15]:





0    3
1    3
2    3
3    3
Name: col3, dtype: int64



In [17]:

    
df['col2'].apply(lambda x: x*2) # coolwow









    Out[17]:





0     888
1    1110
2    1332
3     888
Name: col2, dtype: int64

DataFrame attributes



In [20]:

    
df.columns









    Out[20]:





Index([u'col1', u'col2', u'col3'], dtype='object')



In [22]:

    
df.index









    Out[22]:





RangeIndex(start=0, stop=4, step=1)

Sorting and ordering a DataFrame



In [24]:

    
df.sort_values('col2')



In [25]:

    
df.isnull()









    Out[25]:






  
    
      
      col1
      col2
      col3
    
  
  
    
      0
      False
      False
      False
    
    
      1
      False
      False
      False
    
    
      2
      False
      False
      False
    
    
      3
      False
      False
      False

PivotTables



In [28]:

    
data = {'A':['foo','foo','foo','bar','bar','bar'],
     'B':['one','one','two','two','one','one'],
       'C':['x','y','x','y','x','y'],
       'D':[1,3,2,5,4,1]}

df = pd.DataFrame(data)
df



In [29]:

    
df.pivot_table(values='D',index=['A','B'],columns='C')



In [ ]:

	col1	col2	col3
0	False	False	False
1	False	False	False
2	False	False	False
3	False	False	False