Title: Find Unique Values In Pandas Dataframes
Slug: pandas_find_unique_values
Summary: Find Unique Values In Pandas Dataframes
Date: 2016-05-01 12:00
Category: Python
Tags: Data Wrangling
Authors: Chris Albon


In [1]:
import pandas as pd
import numpy as np

In [2]:
raw_data = {'regiment': ['51st', '29th', '2nd', '19th', '12th', '101st', '90th', '30th', '193th', '1st', '94th', '91th'], 
            'trucks': ['MAZ-7310', np.nan, 'MAZ-7310', 'MAZ-7310', 'Tatra 810', 'Tatra 810', 'Tatra 810', 'Tatra 810', 'ZIS-150', 'Tatra 810', 'ZIS-150', 'ZIS-150'],
            'tanks': ['Merkava Mark 4', 'Merkava Mark 4', 'Merkava Mark 4', 'Leopard 2A6M', 'Leopard 2A6M', 'Leopard 2A6M', 'Arjun MBT', 'Leopard 2A6M', 'Arjun MBT', 'Arjun MBT', 'Arjun MBT', 'Arjun MBT'],
            'aircraft': ['none', 'none', 'none', 'Harbin Z-9', 'Harbin Z-9', 'none', 'Harbin Z-9', 'SH-60B Seahawk', 'SH-60B Seahawk', 'SH-60B Seahawk', 'SH-60B Seahawk', 'SH-60B Seahawk']}

df = pd.DataFrame(raw_data, columns = ['regiment', 'trucks', 'tanks', 'aircraft'])

In [3]:
# View the top few rows
df.head()


Out[3]:
regiment trucks tanks aircraft
0 51st MAZ-7310 Merkava Mark 4 none
1 29th NaN Merkava Mark 4 none
2 2nd MAZ-7310 Merkava Mark 4 none
3 19th MAZ-7310 Leopard 2A6M Harbin Z-9
4 12th Tatra 810 Leopard 2A6M Harbin Z-9

In [4]:
# Create a list of unique values by turning the
# pandas column into a set
list(set(df.trucks))


Out[4]:
[nan, 'Tatra 810', 'MAZ-7310', 'ZIS-150']

In [5]:
# Create a list of unique values in df.trucks
list(df['trucks'].unique())


Out[5]:
['MAZ-7310', nan, 'Tatra 810', 'ZIS-150']