Analysis of tube.csv

Environment set up


In [1]:
%pylab inline
import pandas as pd


Populating the interactive namespace from numpy and matplotlib

Read in data file


In [2]:
tube = pd.read_csv('./competition_data/tube.csv')

Explore the data set


In [3]:
print tube.shape
print tube.dtypes
# print tube.sample(10, random_state=0)


(21198, 16)
tube_assembly_id     object
material_id          object
diameter            float64
wall                float64
length              float64
num_bends             int64
bend_radius         float64
end_a_1x             object
end_a_2x             object
end_x_1x             object
end_x_2x             object
end_a                object
end_x                object
num_boss              int64
num_bracket           int64
other                 int64
dtype: object

In [5]:
tube_isnull = tube.isnull()
tube_isnull_row = tube_isnull.any(axis=1)
print tube_isnull_row.shape
print tube[tube_isnull_row].shape


(21198,)
(279, 16)

In [6]:
tube_is_null = tube[tube.material_id.isnull()]
print tube_is_null.shape


(279, 16)

In [4]:
tube_ta_mat = tube[['tube_assembly_id', 'material_id']]

In [5]:
tube_ta_mat.head()


Out[5]:
tube_assembly_id material_id
0 TA-00001 SP-0035
1 TA-00002 SP-0019
2 TA-00003 SP-0019
3 TA-00004 SP-0019
4 TA-00005 SP-0029

In [15]:
ta_mat_nan = tube_ta_mat[tube_ta_mat.material_id.isnull()]
print ta_mat_nan.shape
print ta_mat_nan.head()


(279, 2)
     tube_assembly_id material_id
1701         TA-01702         NaN
2003         TA-02004         NaN
3359         TA-03360         NaN
3895         TA-03896         NaN
4011         TA-04012         NaN

In [7]:
tube_dummies = pd.get_dummies(tube, columns=['material_id'])

In [9]:
print tube_dummies.shape
print tube_dummies.columns
print tube_dummies.head()


(21198, 34)
Index([u'tube_assembly_id', u'diameter', u'wall', u'length', u'num_bends',
       u'bend_radius', u'end_a_1x', u'end_a_2x', u'end_x_1x', u'end_x_2x',
       u'end_a', u'end_x', u'num_boss', u'num_bracket', u'other',
       u'material_id_SP-0008', u'material_id_SP-0019', u'material_id_SP-0028',
       u'material_id_SP-0029', u'material_id_SP-0030', u'material_id_SP-0031',
       u'material_id_SP-0032', u'material_id_SP-0033', u'material_id_SP-0034',
       u'material_id_SP-0035', u'material_id_SP-0036', u'material_id_SP-0037',
       u'material_id_SP-0038', u'material_id_SP-0039', u'material_id_SP-0041',
       u'material_id_SP-0044', u'material_id_SP-0045', u'material_id_SP-0046',
       u'material_id_SP-0048'],
      dtype='object')
  tube_assembly_id  diameter  wall  length  num_bends  bend_radius end_a_1x  \
0         TA-00001     12.70  1.65     164          5        38.10        N   
1         TA-00002      6.35  0.71     137          8        19.05        N   
2         TA-00003      6.35  0.71     127          7        19.05        N   
3         TA-00004      6.35  0.71     137          9        19.05        N   
4         TA-00005     19.05  1.24     109          4        50.80        N   

  end_a_2x end_x_1x end_x_2x         ...          material_id_SP-0035  \
0        N        N        N         ...                            1   
1        N        N        N         ...                            0   
2        N        N        N         ...                            0   
3        N        N        N         ...                            0   
4        N        N        N         ...                            0   

  material_id_SP-0036  material_id_SP-0037  material_id_SP-0038  \
0                   0                    0                    0   
1                   0                    0                    0   
2                   0                    0                    0   
3                   0                    0                    0   
4                   0                    0                    0   

   material_id_SP-0039  material_id_SP-0041  material_id_SP-0044  \
0                    0                    0                    0   
1                    0                    0                    0   
2                    0                    0                    0   
3                    0                    0                    0   
4                    0                    0                    0   

   material_id_SP-0045  material_id_SP-0046  material_id_SP-0048  
0                    0                    0                    0  
1                    0                    0                    0  
2                    0                    0                    0  
3                    0                    0                    0  
4                    0                    0                    0  

[5 rows x 34 columns]

In [8]:
tube[tube.end_a == 'NONE']


Out[8]:
tube_assembly_id material_id diameter wall length num_bends bend_radius end_a_1x end_a_2x end_x_1x end_x_2x end_a end_x num_boss num_bracket other
12 TA-00013 SP-0028 38.10 1.650 106 3 76.20 N N N N NONE NONE 1 0 0
19 TA-00020 SP-0041 6.35 2.375 81 9 19.05 N N N N NONE NONE 0 0 0
20 TA-00021 SP-0041 6.35 2.375 81 6 19.05 N N N N NONE NONE 0 0 0
31 TA-00032 SP-0028 12.70 0.890 55 2 76.20 N N N N NONE NONE 0 0 0
42 TA-00043 SP-0029 25.40 2.410 68 1 63.50 N N N N NONE EF-003 0 0 0
43 TA-00044 SP-0029 12.70 1.650 36 3 38.10 N N N N NONE NONE 0 0 0
50 TA-00051 SP-0029 76.20 1.650 67 1 152.40 N N Y Y NONE EF-017 0 0 0
63 TA-00064 SP-0029 25.40 1.650 89 5 50.80 N N N N NONE NONE 0 0 0
64 TA-00065 SP-0029 25.40 3.050 193 0 0.00 N N N N NONE EF-003 0 0 0
70 TA-00071 SP-0028 9.52 0.890 62 3 127.00 N N N N NONE NONE 0 0 0
98 TA-00099 SP-0029 22.22 3.050 182 5 50.80 N N N N NONE NONE 1 0 0
123 TA-00124 SP-0019 6.35 0.710 29 2 19.05 N N N N NONE EF-008 0 0 0
134 TA-00135 SP-0028 9.52 0.890 75 1 127.00 N N N N NONE NONE 0 1 0
139 TA-00140 SP-0028 57.15 1.650 89 2 101.60 N N N Y NONE NONE 0 0 0
154 TA-00155 SP-0028 9.52 0.890 85 10 31.75 N N N N NONE EF-003 0 0 0
161 TA-00162 SP-0035 12.70 0.890 91 3 19.05 N N N N NONE NONE 0 0 0
164 TA-00165 SP-0028 12.70 0.890 73 1 152.40 N N N N NONE NONE 0 1 0
165 TA-00166 SP-0028 50.80 1.650 94 3 101.60 N Y N N NONE NONE 1 0 0
192 TA-00193 SP-0028 9.52 0.890 85 10 31.75 N N N N NONE EF-003 0 0 0
228 TA-00229 SP-0028 50.80 1.650 105 3 101.60 N N N Y NONE NONE 1 0 1
277 TA-00278 SP-0028 9.52 0.890 59 2 127.00 N N N N NONE NONE 0 0 0
280 TA-00281 SP-0029 19.05 1.650 187 5 38.10 N N N N NONE NONE 0 0 0
284 TA-00285 SP-0029 19.05 1.650 106 4 38.10 N N N N NONE NONE 0 0 0
285 TA-00286 SP-0029 22.22 1.650 200 6 50.80 N N N N NONE NONE 0 0 0
286 TA-00287 SP-0029 19.05 1.650 57 2 38.10 N N N N NONE NONE 0 0 0
287 TA-00288 SP-0029 19.05 1.650 76 4 38.10 N N N N NONE NONE 0 0 0
288 TA-00289 SP-0029 19.05 1.650 38 3 38.10 N N N N NONE NONE 0 0 0
289 TA-00290 SP-0029 19.05 1.650 50 2 38.10 N N N N NONE NONE 0 0 0
290 TA-00291 SP-0029 19.05 1.650 75 4 38.10 N N N Y NONE NONE 0 0 0
304 TA-00305 SP-0028 34.92 1.650 123 5 63.50 N N N N NONE NONE 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
20419 TA-20421 SP-0029 25.40 1.650 216 6 63.50 N N N N NONE NONE 0 0 0
20432 TA-20434 SP-0029 88.90 3.050 54 1 152.40 N N N Y NONE NONE 0 0 0
20433 TA-20435 SP-0029 88.90 3.050 28 1 152.40 N Y N Y NONE EF-003 0 0 1
20470 TA-20472 SP-0029 76.20 1.650 208 6 152.40 Y Y N N NONE NONE 1 0 0
20471 TA-20473 SP-0019 3.18 0.710 133 7 12.70 N N N N NONE NONE 0 0 0
20472 TA-20474 SP-0019 3.18 0.710 37 2 12.70 N N N N NONE NONE 0 0 0
20491 TA-20493 SP-0019 3.18 0.710 57 4 12.70 N N N N NONE NONE 0 0 0
20515 TA-20517 SP-0019 3.18 0.710 50 3 12.70 N N N N NONE NONE 0 0 0
20538 TA-20540 SP-0029 19.05 1.650 62 3 38.10 N N N N NONE EF-003 0 0 0
20545 TA-20547 SP-0029 22.22 1.650 97 5 50.80 N N N N NONE EF-003 0 0 0
20555 TA-20557 SP-0029 19.05 1.650 37 2 38.10 N N N N NONE NONE 0 0 0
20556 TA-20558 SP-0029 19.05 1.650 53 2 38.10 N N N N NONE NONE 0 0 0
20619 TA-20621 SP-0029 19.05 2.110 29 2 38.10 N N N N NONE NONE 0 0 0
20700 TA-20702 SP-0028 34.92 1.650 118 4 63.50 N Y N N NONE NONE 0 0 0
20867 TA-20869 SP-0029 22.22 2.110 196 2 63.50 N N N N NONE EF-023 1 0 0
20918 TA-20920 SP-0029 19.05 1.650 39 2 50.80 N N N N NONE NONE 0 0 0
20971 TA-20973 SP-0029 63.50 3.050 77 0 0.00 N N N N NONE NONE 0 0 0
20991 TA-20993 SP-0029 12.70 1.240 61 5 38.10 N N N Y NONE NONE 0 0 0
21061 TA-21063 SP-0029 19.05 1.650 62 4 50.80 N N N N NONE NONE 0 0 0
21062 TA-21064 SP-0029 19.05 1.650 64 3 38.10 N N N N NONE NONE 0 0 0
21063 TA-21065 SP-0029 19.05 1.650 83 5 38.10 N N N N NONE NONE 0 0 0
21083 TA-21085 SP-0029 6.35 0.710 13 1 19.05 N N N N NONE NONE 0 0 0
21096 TA-21098 SP-0028 50.80 1.650 62 2 101.60 Y Y N N NONE NONE 0 0 0
21103 TA-21105 SP-0008 6.35 2.260 48 6 19.05 N N N N NONE NONE 0 0 0
21113 TA-21115 SP-0028 50.80 1.650 129 2 127.00 N N N N NONE NONE 1 0 0
21139 TA-21141 SP-0028 9.52 0.890 76 1 127.00 N N N N NONE NONE 0 0 0
21140 TA-21142 SP-0029 152.40 1.650 9 0 0.00 Y Y Y Y NONE NONE 0 0 0
21152 TA-21154 SP-0035 63.50 1.650 181 3 127.00 N N N N NONE EF-009 0 0 0
21156 TA-21158 SP-0028 9.52 0.890 18 1 127.00 N N N N NONE NONE 0 0 0
21171 TA-21173 SP-0028 9.52 0.890 58 1 127.00 N N N N NONE NONE 0 0 0

998 rows × 16 columns