In [1]:
%pylab inline
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier


Populating the interactive namespace from numpy and matplotlib

In [2]:
tube = pd.read_csv('./competition_data/tube.csv')

In [3]:
print tube.dtypes


tube_assembly_id     object
material_id          object
diameter            float64
wall                float64
length              float64
num_bends             int64
bend_radius         float64
end_a_1x             object
end_a_2x             object
end_x_1x             object
end_x_2x             object
end_a                object
end_x                object
num_boss              int64
num_bracket           int64
other                 int64
dtype: object

In [4]:
tube_isnull = tube.isnull()
tube_isnull_row = tube_isnull.any(axis=1)
# print tube_isnull_row.shape

# print tube_isnull_row

tube_train = tube[-tube_isnull_row]
# tube_train_X = tube.drop(['material_id'], axis=1)
tube_train_X = tube_train[['diameter', 'wall', 'length', 'num_bends', 'bend_radius',\
                     'num_boss', 'num_bracket', 'other']]
tube_train_y = tube_train['material_id']

tube_test = tube[tube_isnull_row]
tube_test_X = tube_test[['diameter', 'wall', 'length', 'num_bends', 'bend_radius',\
                     'num_boss', 'num_bracket', 'other']]
tube_test_y = tube_test['material_id']

In [5]:
# tube_test_y

In [6]:
print tube_train.shape
print tube_test.shape


(20919, 16)
(279, 16)

In [7]:
knn5 = KNeighborsClassifier()

In [8]:
knn_fit = knn5.fit(tube_train_X, tube_train_y)

In [9]:
knn_fit.get_params()


Out[9]:
{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [10]:
material_pred = knn_fit.predict(tube_test_X)
print material_pred


['SP-0029' 'SP-0029' 'SP-0029' 'SP-0019' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0037' 'SP-0035' 'SP-0035' 'SP-0035' 'SP-0035'
 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0028' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0008' 'SP-0029'
 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0035'
 'SP-0029' 'SP-0035' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0035'
 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0008'
 'SP-0029' 'SP-0029' 'SP-0044' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0035' 'SP-0029' 'SP-0035' 'SP-0035'
 'SP-0035' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0019'
 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0035'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0035'
 'SP-0029' 'SP-0035' 'SP-0035' 'SP-0035' 'SP-0035' 'SP-0029' 'SP-0035'
 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0035' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0046' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0046' 'SP-0029' 'SP-0029' 'SP-0048' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0048' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0046' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0048'
 'SP-0029' 'SP-0029' 'SP-0029' 'SP-0037' 'SP-0035' 'SP-0035']

In [11]:
tube.ix[tube_isnull_row, 'material_id'] = material_pred

In [12]:
tube.to_csv('tube_material_id_imputed.csv', index=False)