In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('data/src/sample_pandas_normal.csv')
print(df)
In [3]:
df = df.append({'name': 'Dave', 'age': 68, 'state': 'TX', 'point': 70}, ignore_index=True)
print(df)
In [4]:
print(df.duplicated())
In [5]:
print(df[df.duplicated()])
In [6]:
print(df.duplicated(keep='last'))
In [7]:
print(df.duplicated(keep=False))
In [8]:
print(df.duplicated(subset='state'))
In [9]:
print(df.duplicated(subset=['state', 'point']))
In [10]:
print(df.duplicated().value_counts())
In [11]:
print(df.duplicated().value_counts()[True])
In [12]:
print(df.duplicated(keep=False).value_counts()[True])
In [13]:
print(df[~df.duplicated()])
In [14]:
print(df.drop_duplicates())
In [15]:
print(df.drop_duplicates(keep=False))
In [16]:
print(df.drop_duplicates(subset='state'))
In [17]:
df.drop_duplicates(subset='state', keep='last', inplace=True)
print(df)
In [18]:
df = pd.read_csv('data/src/sample_pandas_normal.csv')
print(df)
In [19]:
print(df.groupby('state').mean())
In [20]:
print(df.groupby('state').agg(
{'name': lambda x: ','.join(x),
'age': 'mean',
'point': 'mean'}))
In [21]:
print(df.groupby('state').agg(
{'name': list,
'age': 'mean',
'point': 'mean'}))