av_student_fest_sol_ensembles



In [9]:
import pandas as pd
import os
PATH = os.getcwd()
%matplotlib inline

In [10]:
df1 = pd.read_csv(f'{PATH}\\AV_Stud\\xgb_0.7.csv')
df2 = pd.read_csv(f'{PATH}\\AV_Stud\\xgb_0.4&xgb_0.3.csv')

In [11]:
df1.head()


Out[11]:
id is_pass
0 1626_45 0.507105
1 11020_130 0.995746
2 12652_146 0.540914
3 7038_72 0.006350
4 888_71 0.212951

In [12]:
df2.head()


Out[12]:
id is_pass
0 1626_45 0.488407
1 11020_130 0.994501
2 12652_146 0.595252
3 7038_72 0.010464
4 888_71 0.346809

In [17]:
df2['is_pass'] = df1['is_pass'] * .85 + df2['is_pass'] * .15

In [18]:
df2.to_csv(f'{PATH}\\AV_Stud\\xgb_0.4&xgb_0.3&xgb_0.7.csv', index = False)

In [ ]:
xgb_0.4&xgb_0.3&xgb_0.7.csv'

In [ ]:


In [26]:
df1 = pd.read_csv(f'{PATH}\\AV_Stud\\xgb_0.4&xgb_0.3&xgb_0.7.csv')
df2 = pd.read_csv(f'{PATH}\\AV_Stud\\xgb_0.4&xgb_0.3&xgb_0.7(&xgb_0.4&xgb_0.3).csv')

In [24]:
df2['is_pass'] = df2['is_pass'] * .85 + .15 * df1['is_pass']

In [25]:
df2.to_csv(f'{PATH}\\AV_Stud\\xgb_0.4&xgb_0.3&xgb_0.7(&xgb_0.4&xgb_0.3)_v1.csv', index = False)

In [29]:
df2['is_pass'].quantile(q=[.1,.2,.3,.4,.5,.6,.7,.8,.9])


Out[29]:
0.1    0.177586
0.2    0.342803
0.3    0.494333
0.4    0.633314
0.5    0.747091
0.6    0.834378
0.7    0.898935
0.8    0.945139
0.9    0.977810
Name: is_pass, dtype: float64

In [30]:
max(df2['is_pass'])


Out[30]:
0.99963813462108364

In [31]:
import numpy as np

In [34]:
preds = np.where(df2['is_pass']>=.977, .999, df2['is_pass'])

In [35]:
df2['is_pass'] = preds

In [36]:
df2.to_csv(f'{PATH}\\AV_Stud\\mixed_.977_to_.999.csv',index=False)

In [ ]: