Simple average is taken as final submission.
In [1]:
import pandas as pd
In [3]:
sub1 = pd.read_csv('final.csv')
sub2 = pd.read_csv('xgb_final.csv')
In [7]:
sub1.shape, sub2.shape
Out[7]:
In [6]:
sub1 = sub1.drop('test_id',axis=1)
sub2 = sub2.drop('test_id',axis=1)
In [18]:
dup = (sub1.fold1 + sub1.fold2 + sub1.fold3 + sub1.fold4 + sub1.fold5 + sub1.fold6 + sub1.fold7 + sub2.fold0 + sub2.fold1 + sub2.fold2 + sub2.fold3 +
sub2.fold4 + sub2.fold5)/13
In [19]:
import matplotlib.pyplot as plt
plt.hist(dup,bins=50)
plt.show()
In [3]:
sub = pd.read_csv('sub_av.csv')
In [23]:
sub.is_duplicate = dup
In [24]:
sub.head()
Out[24]:
In [25]:
sub.to_csv('sub_final.csv',index = False)
! rm -rf test.zip
! zip -r test.zip sub_final.csv