In [1]:
%pylab inline
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
In [2]:
tube = pd.read_csv('./competition_data/tube.csv')
In [3]:
print tube.dtypes
In [4]:
tube_isnull = tube.isnull()
tube_isnull_row = tube_isnull.any(axis=1)
# print tube_isnull_row.shape
# print tube_isnull_row
tube_train = tube[-tube_isnull_row]
# tube_train_X = tube.drop(['material_id'], axis=1)
tube_train_X = tube_train[['diameter', 'wall', 'length', 'num_bends', 'bend_radius',\
'num_boss', 'num_bracket', 'other']]
tube_train_y = tube_train['material_id']
tube_test = tube[tube_isnull_row]
tube_test_X = tube_test[['diameter', 'wall', 'length', 'num_bends', 'bend_radius',\
'num_boss', 'num_bracket', 'other']]
tube_test_y = tube_test['material_id']
In [5]:
# tube_test_y
In [6]:
print tube_train.shape
print tube_test.shape
In [7]:
knn5 = KNeighborsClassifier()
In [8]:
knn_fit = knn5.fit(tube_train_X, tube_train_y)
In [9]:
knn_fit.get_params()
Out[9]:
In [10]:
material_pred = knn_fit.predict(tube_test_X)
print material_pred
In [11]:
tube.ix[tube_isnull_row, 'material_id'] = material_pred
In [12]:
tube.to_csv('tube_material_id_imputed.csv', index=False)