In [ ]:
import pandas as pd
In [ ]:
raw_train_dataframe = pd.read_table('train.csv', sep='\t')
cleared_train_dataframe = raw_train_dataframe.drop('DSSR', axis=1).drop('pdb_chain', axis=1).dropna()
del raw_train_dataframe
In [ ]:
import numpy as np
In [ ]:
rows_count = cleared_train_dataframe.shape[0]
train_dataframe = cleared_train_dataframe.iloc[range(0, rows_count, 2), :]
del cleared_train_dataframe
In [ ]:
train_sample = train_dataframe.drop('mg', axis=1).values
train_sample_answers = train_dataframe['mg'].values
del train_dataframe
In [ ]:
from sklearn.ensemble import AdaBoostClassifier
In [ ]:
classifier = AdaBoostClassifier(n_estimators=1000)
classifier.fit(train_sample, train_sample_answers)
del train_sample
del train_sample_answers
In [ ]:
raw_test_dataframe = pd.read_table('test.csv', sep=',')
cleared_test_dataframe = raw_test_dataframe.drop('DSSR', axis=1).drop('pdb_chain', axis=1).drop('Id', axis=1).drop('index', axis=1).dropna()
del raw_test_dataframe
In [ ]:
test_sample = cleared_test_dataframe.values
del cleared_test_dataframe
In [ ]:
test_sample_answers = classifier.predict(test_sample).reshape(-1, 1)
del test_sample
In [ ]:
test_sample_answers.sum()
In [ ]:
submission = np.hstack([np.arange(len(test_sample_answers)).reshape(-1, 1), test_sample_answers])
del test_sample_answers
In [ ]:
import csv
with open('submission.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(['Id', 'mg'])
writer.writerows(submission)
del writer
del submission