In [1]:
import pandas as pd
df = pd.DataFrame({'col1': [1, 2, 3, 4],
'col2': [444, 555, 666, 444],
'col3': ['abc', 'def', 'ghi', 'xyz']})
df.head()
Out[1]:
In [2]:
df['col2'].unique()
Out[2]:
In [3]:
df['col2'].nunique()
Out[3]:
In [4]:
df['col2'].value_counts()
Out[4]:
In [5]:
#Select from DataFrame using criteria from multiple columns
newdf = df[(df['col1'] > 2) & (df['col2'] == 444)]
In [6]:
newdf
Out[6]:
In [7]:
def times2(x):
return x * 2
In [8]:
df['col1'].apply(times2)
Out[8]:
In [9]:
df['col3'].apply(len)
Out[9]:
In [10]:
df['col1'].sum()
Out[10]:
Permanently Removing a Column
In [11]:
del df['col1']
In [12]:
df
Out[12]:
Get column and index names:
In [13]:
df.columns
Out[13]:
In [14]:
df.index
Out[14]:
Sorting and Ordering a DataFrame:
In [15]:
df
Out[15]:
In [16]:
df.sort_values(by = 'col2') #inplace=False by default
Out[16]:
Find Null Values or Check for Null Values
In [17]:
df.isnull()
Out[17]:
In [18]:
# Drop rows with NaN Values
df.dropna()
Out[18]:
Filling in NaN values with something else:
In [19]:
import numpy as np
In [20]:
df = pd.DataFrame({'col1': [1, 2, 3, np.nan],
'col2': [np.nan, 555, 666, 444],
'col3': ['abc', 'def', 'ghi', 'xyz']})
df.head()
Out[20]:
In [21]:
df.fillna('FILL')
Out[21]:
In [22]:
data = {'A': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'],
'B': ['one', 'one', 'two', 'two', 'one', 'one'],
'C': ['x', 'y', 'x', 'y', 'x', 'y'],
'D': [1, 3, 2, 5, 4, 1]}
df = pd.DataFrame(data)
In [23]:
df
Out[23]:
In [24]:
df.pivot_table(values = 'D',
index = ['A', 'B'],
columns = ['C'])
Out[24]: