In [55]:
import pandas as pd
import numpy as np
In [68]:
np.set_printoptions(precision=2)
In [57]:
data = pd.read_csv("../data/beauty.csv", sep=";")
In [58]:
data.head()
Out[58]:
In [59]:
data.info()
In [60]:
data.describe()
Out[60]:
In [61]:
data['exper'].head()
Out[61]:
In [62]:
type(data['exper'])
Out[62]:
In [63]:
data.loc[0:5, ['wage', 'female']]
Out[63]:
In [64]:
data.iloc[:,2:4].head()
Out[64]:
In [65]:
data[data['female'] == 1]['wage'].median(), \
data[data['female'] == 0]['wage'].median()
Out[65]:
In [72]:
data[(data['female'] == 0) & (data['married'] == 1)]['wage'].median(), \
data[(data['female'] == 0) & (data['married'] != 1)]['wage'].median()
Out[72]:
In [82]:
for look, sub_df in data.groupby(['looks']):
print(look, sub_df['female'].mean())
In [83]:
data.groupby('looks')['wage'].mean()
Out[83]:
In [84]:
for look, sub_df in data.groupby(['looks', 'female']):
print(look, sub_df['wage'].mean())
In [90]:
data.groupby('looks')[['wage', 'exper']].agg(np.median)
Out[90]:
In [94]:
pd.crosstab(data['female'], data['married'])
Out[94]:
In [97]:
pd.crosstab(data['female'], data['looks'])
Out[97]:
In [111]:
data['is_rich'] = (data['wage'] > data['wage'].quantile(.75)).astype('int64')
print(data.head())
In [112]:
data.apply(np.mean)
Out[112]:
In [114]:
def string_gender(female):
return 'female' if female else 'male'
In [118]:
data['female'].apply(lambda female: 'female' if female else 'male')[:10]
Out[118]:
In [120]:
d = {1: 'union', 0: 'nonunion'}
In [121]:
data['union'].map(d)[:5]
Out[121]: