In [2]:
import pandas as pd
sasubset = pd.read_csv("SAsubset.csv")
In [ ]:
sasubset
In [ ]:
sasubset['SentimentText']
In [16]:
nte = pd.read_csv("nte.csv")
In [17]:
nte
Out[17]:
code
cname
ename
方國珊
林卓廷
廖添誠
陳云根
梁國雄
張超雄
楊岳橋
...
鄧家彪
范國威
陳玉娥
黃琛喻
李偲嫣
陳志全
梁頌恆
梁金成
容海恩
陳克勤
0
N0101
香海正覺蓮社佛教馬錦燦紀念英文中學
HHCKLA Buddhist Ma Kam Chan Memorial English S...
158
321
23
132
200
302
310
...
104
122
0
13
28
229
247
4
100
815
1
N0201
粉嶺公立學校
Fanling Public School
58
101
4
46
118
127
131
...
55
36
2
4
27
120
94
3
72
190
2
N0202
明愛粉嶺陳震夏中學
Caritas Fanling Chan Chun Ha Secondary School
93
185
4
101
132
222
178
...
68
90
1
6
14
147
167
2
61
283
3
N0301
祥華社區會堂
Cheung Wah Community Hall
198
971
16
249
378
436
435
...
297
180
7
19
41
442
493
4
171
1141
4
N0401
香海正覺蓮社佛教正覺蓮社學校
HHCKLA Buddhist Ching Kok Lin Association School
289
497
11
326
418
541
580
...
244
317
7
17
37
535
668
3
174
1728
5
N0501
鳳溪廖潤琛紀念學校
Fung Kai Liu Yun Sum Memorial School
185
234
24
309
481
427
519
...
266
669
9
24
29
493
815
4
138
1223
6
N0601
香海正覺蓮社佛教普光學校
HHCKLA Buddhist Po Kwong School
110
246
7
138
187
267
287
...
97
131
3
13
22
285
263
0
117
755
7
N0602
路德會賽馬會雍盛綜合服務中心
Jockey Club Yung Shing Lutheran Integrated Ser...
110
205
8
204
259
351
351
...
171
253
10
6
20
324
500
3
75
1405
8
N0701
聖公會嘉福榮真小學
SKH Ka Fuk Wing Chun Primary School
248
363
9
230
371
531
441
...
807
194
2
23
43
424
620
5
212
693
9
N0801
粉嶺官立中學
Fanling Government Secondary School
163
242
7
96
174
315
305
...
101
159
2
7
13
268
199
0
140
515
10
N0802
田家炳中學
Tin Ka Ping Secondary School
80
187
2
89
92
162
160
...
63
71
2
8
13
143
137
2
109
238
11
N0901
風采中學(教育評議會主辦)
Elegantia College (Sponsored by Education Conv...
169
309
9
215
314
299
328
...
215
105
3
15
38
296
423
4
126
1687
12
N1001
保榮路體育館
Po Wing Road Sports Centre
104
307
8
173
201
267
275
...
715
103
1
13
48
295
404
7
307
533
13
N1101
金錢村何東學校
Kam Tsin Village Ho Tung School
40
80
0
39
81
69
109
...
34
29
1
4
11
116
69
1
37
150
14
N1102
蕉徑村公所
Tsiu Keng Village Office
34
61
0
15
61
39
59
...
20
24
0
3
4
55
41
0
22
301
15
N1103
古洞公立愛華學校
Ku Tung Public Oi Wah School
59
105
3
53
99
103
119
...
27
28
3
4
9
100
63
3
36
290
16
N1104
坑頭村公所
Hang Tau Village Office
80
92
1
32
69
62
82
...
31
39
3
7
8
83
62
1
30
240
17
N1201
彩園會堂
Choi Yuen Estate Hall
209
580
7
244
388
393
383
...
300
162
3
28
36
439
487
38
167
1488
18
N1301
龍琛路體育館
Lung Sum Avenue Sports Centre
19
143
0
31
37
31
43
...
31
20
1
4
5
41
62
2
28
130
19
N1302
聖公會陳融中學
SKH Chan Young Secondary School
105
634
8
128
191
190
226
...
124
90
3
17
5
244
282
3
134
552
20
N1303
香海正覺蓮社佛教陳式宏學校
HHCKLA Buddhist Chan Shi Wan Primary School
69
390
1
70
90
124
89
...
74
42
0
6
4
122
100
4
58
214
21
N1401
石湖墟公立學校
Shek Wu Hui Public School
148
629
5
241
392
435
530
...
624
148
4
11
19
441
508
6
134
707
22
N1501
鳳溪廖萬石堂中學
Fung Kai Liu Man Shek Tong Secondary School
162
357
6
182
286
284
425
...
153
128
3
17
24
329
363
11
141
895
23
N1601
打鼓嶺嶺英公立學校
Ta Ku Ling Ling Ying Public School
12
42
0
14
32
25
33
...
6
6
2
2
2
39
26
1
8
93
24
N1602
International College Hong Kong
International College Hong Kong
21
49
0
30
42
31
60
...
8
21
1
1
5
40
34
0
18
223
25
N1604
沙頭角中心小學
Sha Tau Kok Central Primary School
118
42
3
38
76
58
99
...
22
19
1
1
3
56
93
1
31
1091
26
N1606
打鼓嶺社區會堂
Ta Kwu Ling Community Hall
23
45
1
24
41
93
70
...
23
20
1
3
14
49
44
1
26
113
27
N1701
天平體育館
Tin Ping Sports Centre
104
400
4
190
233
278
350
...
227
106
2
11
15
318
342
2
85
581
28
N1702
聯和墟社區會堂
Luen Wo Hui Community Hall
209
338
21
192
271
411
414
...
148
208
6
19
50
342
257
3
166
1019
29
N1801
從謙學校
Tsung Him School
69
37
3
40
41
39
58
...
31
19
0
2
6
50
36
0
16
120
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
113
R1602
才俊學校
Choi Jun School
67
139
2
47
68
139
148
...
35
53
1
3
6
93
62
1
155
75
114
R1701
隆亨社區中心
Lung Hang Estate Community Centre
189
258
5
225
363
402
362
...
300
218
4
10
23
370
306
3
868
106
115
R1801
東莞工商總會張煌偉小學
GCCITKD Cheong Wong Wai Primary School
293
273
8
228
386
437
373
...
344
777
10
2
19
374
325
2
541
163
116
R1901
佛教黃允畋中學
Buddhist Wong Wan Tin College
226
329
5
202
354
478
524
...
337
393
1
14
23
379
361
3
140
78
117
R1902
大圍村村公所
Tai Wai Village Office
75
53
2
56
76
96
106
...
45
51
0
3
5
104
68
0
60
33
118
R2001
樂道中學
Lock Tao Secondary School
221
317
10
166
252
429
436
...
179
223
1
14
26
309
294
2
523
105
119
R2002
基督教香港信義會沙田信義幼稚園
ELCHK Shatin Lutheran Kindergarten
18
37
1
17
27
50
55
...
9
13
0
0
1
49
27
1
53
30
120
R2101
保良局蕭漢森小學
PLK Siu Hon Sum Primary School
212
441
10
196
318
488
789
...
129
192
2
20
30
435
239
1
252
153
121
R2201
賽馬會體藝中學
Jockey Club Ti-I College
180
327
4
154
273
541
537
...
132
185
0
10
42
428
229
0
457
176
122
R2301
香港中文大學富爾敦樓
John Fulton Centre, The Chinese University of ...
39
86
0
24
71
136
81
...
34
46
0
3
1
60
48
1
121
55
123
R2302
仁愛堂香港台山商會長者活動中心
Yan Oi Tong H.K. Toi Shan Association Elderly ...
135
298
8
84
134
380
373
...
84
145
1
10
12
205
123
0
298
147
124
R2401
宣道會台山陳元喜小學
Christian Alliance Toi Shan H. C. Chan Primary...
192
442
20
253
412
1022
594
...
204
240
3
22
24
527
646
2
277
148
125
R2501
香港中文大學校友會聯會陳震夏中學
CUHKFAA Chan Chun Ha Secondary School
250
521
11
266
476
1009
680
...
236
262
6
33
23
583
539
3
207
220
126
R2601
香港道教聯合會純陽小學
Hong Kong Taoist Association Shun Yeung Primar...
262
441
18
222
357
846
704
...
215
285
5
17
27
550
317
1
352
234
127
R2801
明愛馬鞍山中學
Caritas Ma On Shan Secondary School
245
807
24
307
473
855
813
...
302
330
7
22
37
739
554
1
1178
242
128
R2901
利安社區會堂
Lee On Community Hall
313
698
13
406
584
1003
948
...
392
374
8
17
55
952
721
1
547
256
129
R3001
吳氏宗親總會泰伯紀念學校
Ng Clan's Association Tai Pak Memorial School
129
344
9
171
249
425
451
...
212
135
1
22
24
414
301
1
304
89
130
R3002
東華三院黃鳳翎中學
TWGHs Wong Fung Ling College
102
233
9
98
139
317
301
...
76
117
3
9
17
220
165
0
127
109
131
R3101
保良局莊啟程小學
PLK Chong Kee Ting Primary School
233
541
15
334
533
875
693
...
328
238
4
14
30
731
558
2
197
262
132
R3201
馬鞍山聖若瑟小學
Ma On Shan St. Joseph's Primary School
142
563
9
185
364
522
392
...
567
156
4
20
21
419
306
1
162
201
133
R3202
恒安社區中心
Heng On Estate Community Centre
105
315
2
131
262
332
321
...
377
123
2
8
15
330
190
1
120
118
134
R3301
德信中學
Tak Sun Secondary School
201
496
4
255
404
829
757
...
138
310
1
17
19
586
319
0
198
157
135
R3401
聖公會馬鞍山主風小學
SKH Ma On Shan Holy Spirit Primary School
209
462
10
293
412
699
598
...
182
238
5
23
26
454
413
1
287
116
136
R3402
基督教香港信義會馬鞍山信義學校
The ELCHK Ma On Shan Lutheran Primary School
41
77
1
43
81
116
98
...
148
54
3
9
6
87
90
1
50
21
137
R3501
林大輝中學
Lam Tai Fai College
228
396
11
283
379
685
696
...
173
293
5
23
26
544
295
1
964
212
138
R3601
路德會梁鉅鏐小學
Leung Kui Kau Lutheran Primary School
216
475
6
154
274
558
525
...
107
220
1
12
17
398
225
0
224
121
139
R3602
香港浸會大學附屬學校王錦輝中小學
Hong Kong Baptist University Affiliated School...
39
47
3
49
68
57
84
...
59
37
1
7
9
82
64
0
36
33
140
R3701
世界龍岡學校黃耀南小學
LKWFSL Wong Yiu Nam Primary School
211
466
12
282
395
814
763
...
193
231
6
20
22
461
388
2
622
154
141
R3801
廣源社區會堂
Kwong Yuen Community Hall
136
229
8
233
393
564
456
...
222
133
5
18
12
481
369
1
946
173
142
LC005
亞洲國際博覽館 (點算誤投選票)
AsiaWorld-Expo (for counting misplaced ballot ...
6
3
1
1
4
8
4
...
5
2
0
2
0
3
0
0
4
4
143 rows × 25 columns
In [18]:
nte['陳云根']
Out[18]:
0 132
1 46
2 101
3 249
4 326
5 309
6 138
7 204
8 230
9 96
10 89
11 215
12 173
13 39
14 15
15 53
16 32
17 244
18 31
19 128
20 70
21 241
22 182
23 14
24 30
25 38
26 24
27 190
28 192
29 40
...
113 47
114 225
115 228
116 202
117 56
118 166
119 17
120 196
121 154
122 24
123 84
124 253
125 266
126 222
127 307
128 406
129 171
130 98
131 334
132 185
133 131
134 255
135 293
136 43
137 283
138 154
139 49
140 282
141 233
142 1
Name: 陳云根, dtype: int64
In [19]:
nte.iloc[1]
Out[19]:
code N0201
cname 粉嶺公立學校
ename Fanling Public School
方國珊 58
林卓廷 101
廖添誠 4
陳云根 46
梁國雄 118
張超雄 127
楊岳橋 131
麥嘉晉 16
鄭家富 32
葛珮帆 79
侯志強 307
李梓敬 48
鄧家彪 55
范國威 36
陳玉娥 2
黃琛喻 4
李偲嫣 27
陳志全 120
梁頌恆 94
梁金成 3
容海恩 72
陳克勤 190
Name: 1, dtype: object
In [ ]:
nte.iloc[:,1]
In [ ]:
nte.iloc[:,3]
In [ ]:
nte.iloc[:,3:24]
In [ ]:
nte.iloc[:,3:24].sum(0)
In [ ]:
nte.iloc[:,3:24].sum(0)
In [ ]:
nte.iloc[:,3:24].sum(0).sort_values()
In [ ]:
nte.iloc[:,3:24].sum(1)
In [ ]:
nte.iloc[:,3:24].div(nte.iloc[:,3:24].sum(1), axis = 0)
In [ ]:
nte.iloc[:,3:24].div(nte.iloc[:,3:24].sum(1), axis = 0)['梁國雄']
In [ ]:
nte.iloc[:,3:24].div(nte.iloc[:,3:24].sum(1), axis = 0)['梁國雄'] * 100
In [13]:
X = nte.iloc[:,3:24].div(nte.iloc[:,3:24].sum(1), axis = 0)
X
Out[13]:
方國珊
林卓廷
廖添誠
陳云根
梁國雄
張超雄
楊岳橋
麥嘉晉
鄭家富
葛珮帆
...
李梓敬
鄧家彪
范國威
陳玉娥
黃琛喻
李偲嫣
陳志全
梁頌恆
梁金成
容海恩
0
0.060398
0.122706
0.008792
0.050459
0.076453
0.115443
0.118502
0.012615
0.030199
0.039755
...
0.031728
0.039755
0.046636
0.000000
0.004969
0.010703
0.087538
0.094419
0.001529
0.038226
1
0.039189
0.068243
0.002703
0.031081
0.079730
0.085811
0.088514
0.010811
0.021622
0.053378
...
0.032432
0.037162
0.024324
0.001351
0.002703
0.018243
0.081081
0.063514
0.002027
0.048649
2
0.052811
0.105054
0.002271
0.057354
0.074957
0.126065
0.101079
0.022147
0.027825
0.068711
...
0.042589
0.038614
0.051107
0.000568
0.003407
0.007950
0.083475
0.094832
0.001136
0.034639
3
0.040416
0.198204
0.003266
0.050827
0.077159
0.088998
0.088794
0.007553
0.024903
0.034905
...
0.024495
0.060625
0.036742
0.001429
0.003878
0.008369
0.090222
0.100633
0.000816
0.034905
4
0.055037
0.094649
0.002095
0.062083
0.079604
0.103028
0.110455
0.012188
0.030470
0.029899
...
0.027995
0.046467
0.060369
0.001333
0.003237
0.007046
0.101885
0.127214
0.000571
0.033137
5
0.036296
0.045909
0.004709
0.060624
0.094369
0.083775
0.101825
0.006082
0.020600
0.032764
...
0.022955
0.052188
0.131254
0.001766
0.004709
0.005690
0.096724
0.159898
0.000785
0.027075
6
0.045212
0.101110
0.002877
0.056720
0.076860
0.109741
0.117961
0.013152
0.031237
0.026305
...
0.032470
0.039868
0.053843
0.001233
0.005343
0.009042
0.117139
0.108097
0.000000
0.048089
7
0.035065
0.065349
0.002550
0.065030
0.082563
0.111890
0.111890
0.007332
0.022633
0.026458
...
0.024546
0.054511
0.080650
0.003188
0.001913
0.006376
0.103283
0.159388
0.000956
0.023908
8
0.048438
0.070898
0.001758
0.044922
0.072461
0.103711
0.086133
0.008398
0.028320
0.043945
...
0.028711
0.157617
0.037891
0.000391
0.004492
0.008398
0.082812
0.121094
0.000977
0.041406
9
0.063597
0.094421
0.002731
0.037456
0.067889
0.122903
0.119001
0.014046
0.034335
0.049161
...
0.040968
0.039407
0.062037
0.000780
0.002731
0.005072
0.104565
0.077643
0.000000
0.054623
10
0.046976
0.109806
0.001174
0.052261
0.054022
0.095126
0.093952
0.017616
0.036994
0.073987
...
0.054610
0.036994
0.041691
0.001174
0.004698
0.007634
0.083969
0.080446
0.001174
0.064005
11
0.051166
0.093551
0.002725
0.065092
0.095065
0.090524
0.099304
0.009083
0.023009
0.019982
...
0.023615
0.065092
0.031789
0.000908
0.004541
0.011505
0.089616
0.128065
0.001211
0.038147
12
0.027645
0.081606
0.002127
0.045986
0.053429
0.070973
0.073099
0.008772
0.019670
0.066188
...
0.028708
0.190058
0.027379
0.000266
0.003456
0.012759
0.078416
0.107390
0.001861
0.081606
13
0.032653
0.065306
0.000000
0.031837
0.066122
0.056327
0.088980
0.010612
0.026939
0.058776
...
0.046531
0.027755
0.023673
0.000816
0.003265
0.008980
0.094694
0.056327
0.000816
0.030204
14
0.043646
0.078306
0.000000
0.019255
0.078306
0.050064
0.075738
0.006418
0.024390
0.055199
...
0.048780
0.025674
0.030809
0.000000
0.003851
0.005135
0.070603
0.052632
0.000000
0.028241
15
0.039151
0.069675
0.001991
0.035169
0.065693
0.068348
0.078965
0.008626
0.026543
0.047777
...
0.023225
0.017916
0.018580
0.001991
0.002654
0.005972
0.066357
0.041805
0.001991
0.023889
16
0.081967
0.094262
0.001025
0.032787
0.070697
0.063525
0.084016
0.008197
0.019467
0.050205
...
0.050205
0.031762
0.039959
0.003074
0.007172
0.008197
0.085041
0.063525
0.001025
0.030738
17
0.047242
0.131103
0.001582
0.055154
0.087703
0.088834
0.086573
0.006781
0.024864
0.039105
...
0.029159
0.067812
0.036618
0.000678
0.006329
0.008137
0.099231
0.110081
0.008590
0.037749
18
0.032095
0.241554
0.000000
0.052365
0.062500
0.052365
0.072635
0.005068
0.027027
0.038851
...
0.037162
0.052365
0.033784
0.001689
0.006757
0.008446
0.069257
0.104730
0.003378
0.047297
19
0.038307
0.231302
0.002919
0.046698
0.069683
0.069318
0.082452
0.013864
0.016053
0.041591
...
0.040861
0.045239
0.032835
0.001094
0.006202
0.001824
0.089019
0.102882
0.001094
0.048887
20
0.049784
0.281385
0.000722
0.050505
0.064935
0.089466
0.064214
0.011544
0.023088
0.033189
...
0.027417
0.053391
0.030303
0.000000
0.004329
0.002886
0.088023
0.072150
0.002886
0.041847
21
0.030534
0.129771
0.001032
0.049721
0.080875
0.089746
0.109346
0.010109
0.021250
0.043119
...
0.027440
0.128739
0.030534
0.000825
0.002269
0.003920
0.090984
0.104807
0.001238
0.027646
22
0.043642
0.096175
0.001616
0.049030
0.077047
0.076509
0.114494
0.007543
0.025862
0.036099
...
0.028017
0.041218
0.034483
0.000808
0.004580
0.006466
0.088631
0.097791
0.002963
0.037985
23
0.025974
0.090909
0.000000
0.030303
0.069264
0.054113
0.071429
0.002165
0.008658
0.047619
...
0.017316
0.012987
0.012987
0.004329
0.004329
0.004329
0.084416
0.056277
0.002165
0.017316
24
0.033386
0.077901
0.000000
0.047695
0.066773
0.049285
0.095390
0.006359
0.022258
0.044515
...
0.023847
0.012719
0.033386
0.001590
0.001590
0.007949
0.063593
0.054054
0.000000
0.028617
25
0.143552
0.051095
0.003650
0.046229
0.092457
0.070560
0.120438
0.006083
0.013382
0.019465
...
0.031630
0.026764
0.023114
0.001217
0.001217
0.003650
0.068127
0.113139
0.001217
0.037713
26
0.035115
0.068702
0.001527
0.036641
0.062595
0.141985
0.106870
0.006107
0.019847
0.041221
...
0.016794
0.035115
0.030534
0.001527
0.004580
0.021374
0.074809
0.067176
0.001527
0.039695
27
0.034853
0.134048
0.001340
0.063673
0.078083
0.093164
0.117292
0.006702
0.020777
0.040885
...
0.023458
0.076072
0.035523
0.000670
0.003686
0.005027
0.106568
0.114611
0.000670
0.028485
28
0.057895
0.093629
0.005817
0.053186
0.075069
0.113850
0.114681
0.011634
0.035457
0.051247
...
0.044598
0.040997
0.057618
0.001662
0.005263
0.013850
0.094737
0.071191
0.000831
0.045983
29
0.122558
0.065719
0.005329
0.071048
0.072824
0.069272
0.103020
0.012433
0.024867
0.060391
...
0.047957
0.055062
0.033748
0.000000
0.003552
0.010657
0.088810
0.063943
0.000000
0.028419
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
113
0.047891
0.099357
0.001430
0.033595
0.048606
0.099357
0.105790
0.031451
0.030736
0.132952
...
0.072909
0.025018
0.037884
0.000715
0.002144
0.004289
0.066476
0.044317
0.000715
0.110793
114
0.041584
0.056766
0.001100
0.049505
0.079868
0.088449
0.079648
0.005501
0.023322
0.066887
...
0.027943
0.066007
0.047965
0.000880
0.002200
0.005061
0.081408
0.067327
0.000660
0.190979
115
0.055419
0.051636
0.001513
0.043125
0.073009
0.082656
0.070550
0.007187
0.032154
0.092491
...
0.032533
0.065065
0.146964
0.001891
0.000378
0.003594
0.070740
0.061472
0.000378
0.102326
116
0.037226
0.054192
0.000824
0.033273
0.058310
0.078735
0.086312
0.007906
0.029320
0.319881
...
0.020425
0.055510
0.064734
0.000165
0.002306
0.003789
0.062428
0.059463
0.000494
0.023060
117
0.070555
0.049859
0.001881
0.052681
0.071496
0.090310
0.099718
0.015052
0.028222
0.146754
...
0.044214
0.042333
0.047977
0.000000
0.002822
0.004704
0.097836
0.063970
0.000000
0.056444
118
0.049865
0.071525
0.002256
0.037455
0.056859
0.096796
0.098375
0.015794
0.038583
0.134025
...
0.042870
0.040388
0.050316
0.000226
0.003159
0.005866
0.069720
0.066336
0.000451
0.118005
119
0.031915
0.065603
0.001773
0.030142
0.047872
0.088652
0.097518
0.031915
0.031915
0.189716
...
0.092199
0.015957
0.023050
0.000000
0.000000
0.001773
0.086879
0.047872
0.001773
0.093972
120
0.040520
0.084289
0.001911
0.037462
0.060780
0.093272
0.150803
0.021980
0.027141
0.178708
...
0.051414
0.024656
0.036697
0.000382
0.003823
0.005734
0.083142
0.045680
0.000191
0.048165
121
0.036893
0.067022
0.000820
0.031564
0.055954
0.110883
0.110064
0.039762
0.028080
0.136093
...
0.072966
0.027055
0.037918
0.000000
0.002050
0.008608
0.087723
0.046936
0.000000
0.093667
122
0.037428
0.082534
0.000000
0.023033
0.068138
0.130518
0.077735
0.028791
0.028791
0.126679
...
0.087332
0.032630
0.044146
0.000000
0.002879
0.000960
0.057582
0.046065
0.000960
0.116123
123
0.041247
0.091048
0.002444
0.025665
0.040941
0.116101
0.113963
0.047357
0.034219
0.128628
...
0.089215
0.025665
0.044302
0.000306
0.003055
0.003666
0.062634
0.037580
0.000000
0.091048
124
0.026020
0.059900
0.002710
0.034286
0.055834
0.138501
0.080499
0.018160
0.028595
0.245968
...
0.044722
0.027646
0.032525
0.000407
0.002981
0.003252
0.071419
0.087546
0.000271
0.037539
125
0.033021
0.068815
0.001453
0.035134
0.062871
0.133272
0.089816
0.014661
0.029322
0.240391
...
0.040417
0.031172
0.034606
0.000792
0.004359
0.003038
0.077004
0.071193
0.000396
0.027341
126
0.041508
0.069867
0.002852
0.035171
0.056559
0.134030
0.111534
0.038498
0.033587
0.121831
...
0.073511
0.034062
0.045152
0.000792
0.002693
0.004278
0.087136
0.050222
0.000158
0.055767
127
0.028987
0.095480
0.002840
0.036323
0.055963
0.101159
0.096190
0.022835
0.032300
0.095244
...
0.055963
0.035731
0.039044
0.000828
0.002603
0.004378
0.087435
0.065547
0.000118
0.139375
128
0.032982
0.073551
0.001370
0.042782
0.061538
0.105690
0.099895
0.017071
0.030664
0.157323
...
0.052160
0.041307
0.039410
0.000843
0.001791
0.005796
0.100316
0.075975
0.000105
0.057640
129
0.029751
0.079336
0.002076
0.039437
0.057426
0.098017
0.104013
0.015221
0.028137
0.181273
...
0.037823
0.048893
0.031135
0.000231
0.005074
0.005535
0.095480
0.069419
0.000231
0.070111
130
0.038842
0.088728
0.003427
0.037319
0.052932
0.120716
0.114623
0.029322
0.037319
0.136329
...
0.059787
0.028941
0.044554
0.001142
0.003427
0.006474
0.083778
0.062833
0.000000
0.048363
131
0.032032
0.074374
0.002062
0.045917
0.073275
0.120291
0.095271
0.009623
0.026533
0.204839
...
0.025708
0.045092
0.032719
0.000550
0.001925
0.004124
0.100495
0.076712
0.000275
0.027083
132
0.030400
0.120531
0.001927
0.039606
0.077928
0.111753
0.083922
0.008349
0.026547
0.119461
...
0.024620
0.121387
0.033398
0.000856
0.004282
0.004496
0.089702
0.065511
0.000214
0.034682
133
0.031837
0.095512
0.000606
0.039721
0.079442
0.100667
0.097332
0.014857
0.036689
0.116434
...
0.031837
0.114312
0.037295
0.000606
0.002426
0.004548
0.100061
0.057611
0.000303
0.036386
134
0.029973
0.073964
0.000596
0.038026
0.060245
0.123621
0.112884
0.018789
0.036833
0.216224
...
0.050254
0.020579
0.046227
0.000149
0.002535
0.002833
0.087384
0.047569
0.000000
0.029526
135
0.033672
0.074432
0.001611
0.047205
0.066377
0.112615
0.096343
0.010311
0.035766
0.208958
...
0.047527
0.029322
0.038344
0.000806
0.003705
0.004189
0.073143
0.066538
0.000161
0.046238
136
0.030370
0.057037
0.000741
0.031852
0.060000
0.085926
0.072593
0.007407
0.021481
0.278519
...
0.020741
0.109630
0.040000
0.002222
0.006667
0.004444
0.064444
0.066667
0.000741
0.037037
137
0.037328
0.064833
0.001801
0.046333
0.062050
0.112148
0.113949
0.019646
0.039129
0.075966
...
0.040275
0.028324
0.047970
0.000819
0.003766
0.004257
0.089064
0.048297
0.000164
0.157826
138
0.045028
0.099020
0.001251
0.032103
0.057119
0.116323
0.109443
0.025433
0.034188
0.171982
...
0.055868
0.022306
0.045862
0.000208
0.002502
0.003544
0.082969
0.046904
0.000000
0.046696
139
0.033023
0.039797
0.002540
0.041490
0.057578
0.048264
0.071126
0.005080
0.020322
0.408975
...
0.017782
0.049958
0.031329
0.000847
0.005927
0.007621
0.069433
0.054191
0.000000
0.030483
140
0.031777
0.070181
0.001807
0.042470
0.059488
0.122590
0.114910
0.021988
0.031627
0.163404
...
0.038554
0.029066
0.034789
0.000904
0.003012
0.003313
0.069428
0.058434
0.000301
0.093675
141
0.027823
0.046849
0.001637
0.047668
0.080401
0.115385
0.093290
0.005933
0.025368
0.080606
...
0.023118
0.045417
0.027209
0.001023
0.003682
0.002455
0.098404
0.075491
0.000205
0.193535
142
0.113208
0.056604
0.018868
0.018868
0.075472
0.150943
0.075472
0.037736
0.056604
0.056604
...
0.037736
0.094340
0.037736
0.000000
0.037736
0.000000
0.056604
0.000000
0.000000
0.075472
143 rows × 21 columns
In [5]:
import numpy as np
In [ ]:
X.as_matrix
In [20]:
from sklearn.cluster import KMeans
In [21]:
hx_km = KMeans(n_clusters = 2)
In [22]:
hx_km.fit(X.values)
Out[22]:
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
n_clusters=2, n_init=10, n_jobs=1, precompute_distances='auto',
random_state=None, tol=0.0001, verbose=0)
In [23]:
hx_km.predict(X.values)
Out[23]:
array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1], dtype=int32)
In [28]:
nte['cname'][hx_km.predict(X.values)==0] ### What so special about these stations?
Out[28]:
1 粉嶺公立學校
13 金錢村何東學校
14 蕉徑村公所
15 古洞公立愛華學校
16 坑頭村公所
22 鳳溪廖萬石堂中學
23 打鼓嶺嶺英公立學校
24 International College Hong Kong
25 沙頭角中心小學
26 打鼓嶺社區會堂
30 龍山學校
53 泰亨公立學校
Name: cname, dtype: object
In [ ]:
In [30]:
X[hx_km.predict(X.values)==0].mean(0)
Out[30]:
方國珊 0.051348
林卓廷 0.074191
廖添誠 0.001043
陳云根 0.035734
梁國雄 0.071547
張超雄 0.075257
楊岳橋 0.091888
麥嘉晉 0.008439
鄭家富 0.025493
葛珮帆 0.046062
侯志強 0.228191
李梓敬 0.034708
鄧家彪 0.029588
范國威 0.029213
陳玉娥 0.001931
黃琛喻 0.003298
李偲嫣 0.009414
陳志全 0.080019
梁頌恆 0.065988
梁金成 0.001363
容海恩 0.035285
dtype: float64
In [31]:
## Evaluation
from sklearn.metrics import silhouette_score
print silhouette_score(X.values, hx_km.predict(X.values))
0.389590606587
In [32]:
hx_km = KMeans(n_clusters = 3)
hx_km.fit(X.values)
print silhouette_score(X.values, hx_km.predict(X.values))
0.26953278068
In [33]:
X.values.shape
Out[33]:
(143, 21)
In [34]:
X.values.transpose()
Out[34]:
array([[ 0.06039755, 0.03918919, 0.0528109 , ..., 0.03177711,
0.02782324, 0.11320755],
[ 0.12270642, 0.06824324, 0.10505395, ..., 0.07018072,
0.04684943, 0.05660377],
[ 0.00879205, 0.0027027 , 0.00227144, ..., 0.00180723,
0.00163666, 0.01886792],
...,
[ 0.09441896, 0.06351351, 0.09483248, ..., 0.05843373,
0.075491 , 0. ],
[ 0.00152905, 0.00202703, 0.00113572, ..., 0.0003012 ,
0.00020458, 0. ],
[ 0.0382263 , 0.04864865, 0.03463941, ..., 0.0936747 ,
0.19353519, 0.0754717 ]])
In [35]:
X.values.transpose().shape
Out[35]:
(21, 143)
In [36]:
hx_km = KMeans(n_clusters = 2)
hx_km.fit(X.values.transpose())
hx_km.predict(X.values.transpose())
Out[36]:
array([1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1], dtype=int32)
In [ ]:
nte.columns.values[3:24]
In [37]:
for name in nte.columns.values[3:24][hx_km.predict(X.values.transpose())==0]:
print name
廖添誠
麥嘉晉
鄭家富
侯志強
李梓敬
陳玉娥
黃琛喻
李偲嫣
梁金成
In [38]:
for name in nte.columns.values[3:24][hx_km.predict(X.values.transpose())==1]:
print name
方國珊
林卓廷
陳云根
梁國雄
張超雄
楊岳橋
葛珮帆
鄧家彪
范國威
陳志全
梁頌恆
容海恩
In [ ]:
Content source: chainsawriot/pycon2016hk_sklearn
Similar notebooks: