In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
In [2]:
dat = pd.read_csv('tips.csv')
In [3]:
dat.head()
Out[3]:
In [4]:
dat.loc[:, ['sex', 'smoker']].pivot_table(index='sex', columns='smoker', aggfunc=len)
Out[4]:
In [22]:
dat.loc[:, ['sex', 'smoker']].pivot_table(index='sex', columns='smoker', aggfunc=len).plot.bar(color=None)
Out[22]:
In [4]:
dat.describe()
Out[4]:
In [5]:
dat.sex.value_counts(dropna=False)
Out[5]:
In [6]:
dat.smoker.value_counts(dropna=False)
Out[6]:
In [7]:
dat.day.value_counts(dropna=False)
Out[7]:
In [8]:
dat.time.value_counts(dropna=False)
Out[8]:
In [9]:
dat.dtypes
Out[9]:
In [12]:
dat_float = dat.select_dtypes(include=[np.float])
In [13]:
dat_float.head()
Out[13]:
In [15]:
dat_float_median = dat_float.median()
In [17]:
dat_float_median
Out[17]:
In [18]:
dat.loc[:, dat_float_median.index].head()
Out[18]:
In [9]:
pd.get_dummies(dat.loc[:, ['sex', 'smoker', 'day', 'time']], drop_first=True).head()
Out[9]:
In [ ]: