In [25]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [29]:
sns.set(font_scale=1.8)

In [5]:
df = pd.read_csv('test_data.csv')

In [6]:
df.head()


Out[6]:
entity_id true_label score age gender_female incident_rate random_feature_1 random_feature_2 random_feature_3 random_feature_4 ... random_feature_80 random_feature_81 random_feature_82 random_feature_83 random_feature_84 random_feature_85 random_feature_86 random_feature_87 random_feature_88 random_feature_89
0 310 True 0.6625 90.689034 0 0.418747 1.0 0.0 1.0 1.0 ... -1.063749 5.917597 1.436115 -1.912858 -0.569543 2.965550 -2.513481 -0.340876 -2.947102 3.077114
1 275 True 0.6255 58.470392 0 0.832463 1.0 0.0 1.0 1.0 ... -11.846448 -6.032877 1.128381 -1.377320 -0.008848 14.424553 -8.934291 0.814477 -1.491280 -0.161036
2 297 True 0.6120 50.942681 0 0.616363 0.0 1.0 1.0 0.0 ... -7.282588 -2.098788 -2.724240 -0.235813 -0.278502 -0.138277 1.596197 0.782371 -1.799455 -0.158378
3 413 True 0.6095 66.473848 0 0.880119 1.0 0.0 1.0 0.0 ... -3.701282 1.775662 0.749186 2.046784 -1.079093 2.349472 9.136439 -0.040463 -0.113097 4.016155
4 80 True 0.6040 62.094553 0 0.778579 0.0 1.0 1.0 0.0 ... -0.280419 -0.240055 0.794865 0.577040 -1.748218 3.642191 0.448044 -1.863053 -1.924604 3.214991

5 rows × 92 columns


In [39]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title('truly positive group')
ax = sns.stripplot(x="gender_female", y="score", data=df[df.true_label==True], jitter=True, ax=ax, size=5)



In [40]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title('truly negative group')
ax = sns.stripplot(x="gender_female", y="score", data=df[df.true_label==False], jitter=True, ax=ax, size=5)