In [1]:
%pylab inline
import pandas as pd
Populating the interactive namespace from numpy and matplotlib
In [2]:
tube = pd.read_csv('./competition_data/tube.csv')
In [3]:
print tube.shape
print tube.dtypes
# print tube.sample(10, random_state=0)
(21198, 16)
tube_assembly_id object
material_id object
diameter float64
wall float64
length float64
num_bends int64
bend_radius float64
end_a_1x object
end_a_2x object
end_x_1x object
end_x_2x object
end_a object
end_x object
num_boss int64
num_bracket int64
other int64
dtype: object
In [5]:
tube_isnull = tube.isnull()
tube_isnull_row = tube_isnull.any(axis=1)
print tube_isnull_row.shape
print tube[tube_isnull_row].shape
(21198,)
(279, 16)
In [6]:
tube_is_null = tube[tube.material_id.isnull()]
print tube_is_null.shape
(279, 16)
In [4]:
tube_ta_mat = tube[['tube_assembly_id', 'material_id']]
In [5]:
tube_ta_mat.head()
Out[5]:
tube_assembly_id
material_id
0
TA-00001
SP-0035
1
TA-00002
SP-0019
2
TA-00003
SP-0019
3
TA-00004
SP-0019
4
TA-00005
SP-0029
In [15]:
ta_mat_nan = tube_ta_mat[tube_ta_mat.material_id.isnull()]
print ta_mat_nan.shape
print ta_mat_nan.head()
(279, 2)
tube_assembly_id material_id
1701 TA-01702 NaN
2003 TA-02004 NaN
3359 TA-03360 NaN
3895 TA-03896 NaN
4011 TA-04012 NaN
In [7]:
tube_dummies = pd.get_dummies(tube, columns=['material_id'])
In [9]:
print tube_dummies.shape
print tube_dummies.columns
print tube_dummies.head()
(21198, 34)
Index([u'tube_assembly_id', u'diameter', u'wall', u'length', u'num_bends',
u'bend_radius', u'end_a_1x', u'end_a_2x', u'end_x_1x', u'end_x_2x',
u'end_a', u'end_x', u'num_boss', u'num_bracket', u'other',
u'material_id_SP-0008', u'material_id_SP-0019', u'material_id_SP-0028',
u'material_id_SP-0029', u'material_id_SP-0030', u'material_id_SP-0031',
u'material_id_SP-0032', u'material_id_SP-0033', u'material_id_SP-0034',
u'material_id_SP-0035', u'material_id_SP-0036', u'material_id_SP-0037',
u'material_id_SP-0038', u'material_id_SP-0039', u'material_id_SP-0041',
u'material_id_SP-0044', u'material_id_SP-0045', u'material_id_SP-0046',
u'material_id_SP-0048'],
dtype='object')
tube_assembly_id diameter wall length num_bends bend_radius end_a_1x \
0 TA-00001 12.70 1.65 164 5 38.10 N
1 TA-00002 6.35 0.71 137 8 19.05 N
2 TA-00003 6.35 0.71 127 7 19.05 N
3 TA-00004 6.35 0.71 137 9 19.05 N
4 TA-00005 19.05 1.24 109 4 50.80 N
end_a_2x end_x_1x end_x_2x ... material_id_SP-0035 \
0 N N N ... 1
1 N N N ... 0
2 N N N ... 0
3 N N N ... 0
4 N N N ... 0
material_id_SP-0036 material_id_SP-0037 material_id_SP-0038 \
0 0 0 0
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
material_id_SP-0039 material_id_SP-0041 material_id_SP-0044 \
0 0 0 0
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
material_id_SP-0045 material_id_SP-0046 material_id_SP-0048
0 0 0 0
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
[5 rows x 34 columns]
In [8]:
tube[tube.end_a == 'NONE']
Out[8]:
tube_assembly_id
material_id
diameter
wall
length
num_bends
bend_radius
end_a_1x
end_a_2x
end_x_1x
end_x_2x
end_a
end_x
num_boss
num_bracket
other
12
TA-00013
SP-0028
38.10
1.650
106
3
76.20
N
N
N
N
NONE
NONE
1
0
0
19
TA-00020
SP-0041
6.35
2.375
81
9
19.05
N
N
N
N
NONE
NONE
0
0
0
20
TA-00021
SP-0041
6.35
2.375
81
6
19.05
N
N
N
N
NONE
NONE
0
0
0
31
TA-00032
SP-0028
12.70
0.890
55
2
76.20
N
N
N
N
NONE
NONE
0
0
0
42
TA-00043
SP-0029
25.40
2.410
68
1
63.50
N
N
N
N
NONE
EF-003
0
0
0
43
TA-00044
SP-0029
12.70
1.650
36
3
38.10
N
N
N
N
NONE
NONE
0
0
0
50
TA-00051
SP-0029
76.20
1.650
67
1
152.40
N
N
Y
Y
NONE
EF-017
0
0
0
63
TA-00064
SP-0029
25.40
1.650
89
5
50.80
N
N
N
N
NONE
NONE
0
0
0
64
TA-00065
SP-0029
25.40
3.050
193
0
0.00
N
N
N
N
NONE
EF-003
0
0
0
70
TA-00071
SP-0028
9.52
0.890
62
3
127.00
N
N
N
N
NONE
NONE
0
0
0
98
TA-00099
SP-0029
22.22
3.050
182
5
50.80
N
N
N
N
NONE
NONE
1
0
0
123
TA-00124
SP-0019
6.35
0.710
29
2
19.05
N
N
N
N
NONE
EF-008
0
0
0
134
TA-00135
SP-0028
9.52
0.890
75
1
127.00
N
N
N
N
NONE
NONE
0
1
0
139
TA-00140
SP-0028
57.15
1.650
89
2
101.60
N
N
N
Y
NONE
NONE
0
0
0
154
TA-00155
SP-0028
9.52
0.890
85
10
31.75
N
N
N
N
NONE
EF-003
0
0
0
161
TA-00162
SP-0035
12.70
0.890
91
3
19.05
N
N
N
N
NONE
NONE
0
0
0
164
TA-00165
SP-0028
12.70
0.890
73
1
152.40
N
N
N
N
NONE
NONE
0
1
0
165
TA-00166
SP-0028
50.80
1.650
94
3
101.60
N
Y
N
N
NONE
NONE
1
0
0
192
TA-00193
SP-0028
9.52
0.890
85
10
31.75
N
N
N
N
NONE
EF-003
0
0
0
228
TA-00229
SP-0028
50.80
1.650
105
3
101.60
N
N
N
Y
NONE
NONE
1
0
1
277
TA-00278
SP-0028
9.52
0.890
59
2
127.00
N
N
N
N
NONE
NONE
0
0
0
280
TA-00281
SP-0029
19.05
1.650
187
5
38.10
N
N
N
N
NONE
NONE
0
0
0
284
TA-00285
SP-0029
19.05
1.650
106
4
38.10
N
N
N
N
NONE
NONE
0
0
0
285
TA-00286
SP-0029
22.22
1.650
200
6
50.80
N
N
N
N
NONE
NONE
0
0
0
286
TA-00287
SP-0029
19.05
1.650
57
2
38.10
N
N
N
N
NONE
NONE
0
0
0
287
TA-00288
SP-0029
19.05
1.650
76
4
38.10
N
N
N
N
NONE
NONE
0
0
0
288
TA-00289
SP-0029
19.05
1.650
38
3
38.10
N
N
N
N
NONE
NONE
0
0
0
289
TA-00290
SP-0029
19.05
1.650
50
2
38.10
N
N
N
N
NONE
NONE
0
0
0
290
TA-00291
SP-0029
19.05
1.650
75
4
38.10
N
N
N
Y
NONE
NONE
0
0
0
304
TA-00305
SP-0028
34.92
1.650
123
5
63.50
N
N
N
N
NONE
NONE
0
0
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
20419
TA-20421
SP-0029
25.40
1.650
216
6
63.50
N
N
N
N
NONE
NONE
0
0
0
20432
TA-20434
SP-0029
88.90
3.050
54
1
152.40
N
N
N
Y
NONE
NONE
0
0
0
20433
TA-20435
SP-0029
88.90
3.050
28
1
152.40
N
Y
N
Y
NONE
EF-003
0
0
1
20470
TA-20472
SP-0029
76.20
1.650
208
6
152.40
Y
Y
N
N
NONE
NONE
1
0
0
20471
TA-20473
SP-0019
3.18
0.710
133
7
12.70
N
N
N
N
NONE
NONE
0
0
0
20472
TA-20474
SP-0019
3.18
0.710
37
2
12.70
N
N
N
N
NONE
NONE
0
0
0
20491
TA-20493
SP-0019
3.18
0.710
57
4
12.70
N
N
N
N
NONE
NONE
0
0
0
20515
TA-20517
SP-0019
3.18
0.710
50
3
12.70
N
N
N
N
NONE
NONE
0
0
0
20538
TA-20540
SP-0029
19.05
1.650
62
3
38.10
N
N
N
N
NONE
EF-003
0
0
0
20545
TA-20547
SP-0029
22.22
1.650
97
5
50.80
N
N
N
N
NONE
EF-003
0
0
0
20555
TA-20557
SP-0029
19.05
1.650
37
2
38.10
N
N
N
N
NONE
NONE
0
0
0
20556
TA-20558
SP-0029
19.05
1.650
53
2
38.10
N
N
N
N
NONE
NONE
0
0
0
20619
TA-20621
SP-0029
19.05
2.110
29
2
38.10
N
N
N
N
NONE
NONE
0
0
0
20700
TA-20702
SP-0028
34.92
1.650
118
4
63.50
N
Y
N
N
NONE
NONE
0
0
0
20867
TA-20869
SP-0029
22.22
2.110
196
2
63.50
N
N
N
N
NONE
EF-023
1
0
0
20918
TA-20920
SP-0029
19.05
1.650
39
2
50.80
N
N
N
N
NONE
NONE
0
0
0
20971
TA-20973
SP-0029
63.50
3.050
77
0
0.00
N
N
N
N
NONE
NONE
0
0
0
20991
TA-20993
SP-0029
12.70
1.240
61
5
38.10
N
N
N
Y
NONE
NONE
0
0
0
21061
TA-21063
SP-0029
19.05
1.650
62
4
50.80
N
N
N
N
NONE
NONE
0
0
0
21062
TA-21064
SP-0029
19.05
1.650
64
3
38.10
N
N
N
N
NONE
NONE
0
0
0
21063
TA-21065
SP-0029
19.05
1.650
83
5
38.10
N
N
N
N
NONE
NONE
0
0
0
21083
TA-21085
SP-0029
6.35
0.710
13
1
19.05
N
N
N
N
NONE
NONE
0
0
0
21096
TA-21098
SP-0028
50.80
1.650
62
2
101.60
Y
Y
N
N
NONE
NONE
0
0
0
21103
TA-21105
SP-0008
6.35
2.260
48
6
19.05
N
N
N
N
NONE
NONE
0
0
0
21113
TA-21115
SP-0028
50.80
1.650
129
2
127.00
N
N
N
N
NONE
NONE
1
0
0
21139
TA-21141
SP-0028
9.52
0.890
76
1
127.00
N
N
N
N
NONE
NONE
0
0
0
21140
TA-21142
SP-0029
152.40
1.650
9
0
0.00
Y
Y
Y
Y
NONE
NONE
0
0
0
21152
TA-21154
SP-0035
63.50
1.650
181
3
127.00
N
N
N
N
NONE
EF-009
0
0
0
21156
TA-21158
SP-0028
9.52
0.890
18
1
127.00
N
N
N
N
NONE
NONE
0
0
0
21171
TA-21173
SP-0028
9.52
0.890
58
1
127.00
N
N
N
N
NONE
NONE
0
0
0
998 rows × 16 columns
Content source: timmyshen/Cat_Tube
Similar notebooks: