In [3]:
import pandas as pd
import numpy as np
%matplotlib inline
In [40]:
df = pd.read_csv('titanic.csv')
df[:5]
Out[40]:
In [41]:
def calc_shares(target_filter, category_filter):
target_count = target_filter.sum()
total_count = len(target_filter)
target_cat_count = (category_filter & target_filter).sum()
cat_count = category_filter.sum()
global_share = target_count/total_count
cat_share = target_cat_count/cat_count
share_mix = (target_count + target_cat_count) / (total_count + cat_count)
return global_share, share_mix, cat_share
In [46]:
pd.DataFrame([
('class=Third',) + calc_shares(df.survived == 1, df['class'] == 'Third'),
('class=Second',) + calc_shares(df.survived == 1, df['class'] == 'Second'),
('class=First',) + calc_shares(df.survived == 1, df['class'] == 'First'),
('who=child',) + calc_shares(df.survived == 1, df['who'] == 'child'),
], columns=['category', 'global_share', 'mixed_share', 'in_category_share'])
Out[46]:
In [17]:
base_a = 500
base_b = 500
atom = 50 # basic category count
def calc_mix(share_a, share_b):
n_cat_a = share_a*atom
n_cat_b = share_b*atom
return (base_a+n_cat_a) / (base_a+base_b+n_cat_a+n_cat_b)
slist = [
[calc_mix(share_a, share_b) for share_b in range(1, 11)]
for share_a in range(1, 11)
]
shares = np.array(slist)
columns=['b:{:.2f}'.format(i*atom/base_b) for i in range(1, 11)]
index=['a:{:.2f}'.format(i*atom/base_a) for i in range(1, 11)]
mixes = pd.DataFrame(shares, columns=columns, index=index)
mixes
Out[17]:
In [18]:
mixes.T.plot.line(figsize=(12, 8))
Out[18]:
In [ ]: