Title: Delete Duplicates In Pandas
Slug: pandas_delete_duplicates
Summary: Delete Duplicates In Pandas
Date: 2016-05-01 12:00
Category: Python
Tags: Data Wrangling
Authors: Chris Albon
In [1]:
import pandas as pd
In [2]:
raw_data = {'first_name': ['Jason', 'Jason', 'Jason','Tina', 'Jake', 'Amy'],
'last_name': ['Miller', 'Miller', 'Miller','Ali', 'Milner', 'Cooze'],
'age': [42, 42, 1111111, 36, 24, 73],
'preTestScore': [4, 4, 4, 31, 2, 3],
'postTestScore': [25, 25, 25, 57, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'preTestScore', 'postTestScore'])
df
Out[2]:
In [3]:
df.duplicated()
Out[3]:
In [4]:
df.drop_duplicates()
Out[4]:
In [5]:
df.drop_duplicates(['first_name'], keep='last')
Out[5]: