グループ化やピボットテーブルを学びます。scikit-learnのデータセットを使用します。
In [1]:
import pandas as pd
from sklearn.datasets import load_boston
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df
Out[1]:
CRIM
ZN
INDUS
CHAS
NOX
RM
AGE
DIS
RAD
TAX
PTRATIO
B
LSTAT
0
0.00632
18.0
2.31
0.0
0.538
6.575
65.2
4.0900
1.0
296.0
15.3
396.90
4.98
1
0.02731
0.0
7.07
0.0
0.469
6.421
78.9
4.9671
2.0
242.0
17.8
396.90
9.14
2
0.02729
0.0
7.07
0.0
0.469
7.185
61.1
4.9671
2.0
242.0
17.8
392.83
4.03
3
0.03237
0.0
2.18
0.0
0.458
6.998
45.8
6.0622
3.0
222.0
18.7
394.63
2.94
4
0.06905
0.0
2.18
0.0
0.458
7.147
54.2
6.0622
3.0
222.0
18.7
396.90
5.33
5
0.02985
0.0
2.18
0.0
0.458
6.430
58.7
6.0622
3.0
222.0
18.7
394.12
5.21
6
0.08829
12.5
7.87
0.0
0.524
6.012
66.6
5.5605
5.0
311.0
15.2
395.60
12.43
7
0.14455
12.5
7.87
0.0
0.524
6.172
96.1
5.9505
5.0
311.0
15.2
396.90
19.15
8
0.21124
12.5
7.87
0.0
0.524
5.631
100.0
6.0821
5.0
311.0
15.2
386.63
29.93
9
0.17004
12.5
7.87
0.0
0.524
6.004
85.9
6.5921
5.0
311.0
15.2
386.71
17.10
10
0.22489
12.5
7.87
0.0
0.524
6.377
94.3
6.3467
5.0
311.0
15.2
392.52
20.45
11
0.11747
12.5
7.87
0.0
0.524
6.009
82.9
6.2267
5.0
311.0
15.2
396.90
13.27
12
0.09378
12.5
7.87
0.0
0.524
5.889
39.0
5.4509
5.0
311.0
15.2
390.50
15.71
13
0.62976
0.0
8.14
0.0
0.538
5.949
61.8
4.7075
4.0
307.0
21.0
396.90
8.26
14
0.63796
0.0
8.14
0.0
0.538
6.096
84.5
4.4619
4.0
307.0
21.0
380.02
10.26
15
0.62739
0.0
8.14
0.0
0.538
5.834
56.5
4.4986
4.0
307.0
21.0
395.62
8.47
16
1.05393
0.0
8.14
0.0
0.538
5.935
29.3
4.4986
4.0
307.0
21.0
386.85
6.58
17
0.78420
0.0
8.14
0.0
0.538
5.990
81.7
4.2579
4.0
307.0
21.0
386.75
14.67
18
0.80271
0.0
8.14
0.0
0.538
5.456
36.6
3.7965
4.0
307.0
21.0
288.99
11.69
19
0.72580
0.0
8.14
0.0
0.538
5.727
69.5
3.7965
4.0
307.0
21.0
390.95
11.28
20
1.25179
0.0
8.14
0.0
0.538
5.570
98.1
3.7979
4.0
307.0
21.0
376.57
21.02
21
0.85204
0.0
8.14
0.0
0.538
5.965
89.2
4.0123
4.0
307.0
21.0
392.53
13.83
22
1.23247
0.0
8.14
0.0
0.538
6.142
91.7
3.9769
4.0
307.0
21.0
396.90
18.72
23
0.98843
0.0
8.14
0.0
0.538
5.813
100.0
4.0952
4.0
307.0
21.0
394.54
19.88
24
0.75026
0.0
8.14
0.0
0.538
5.924
94.1
4.3996
4.0
307.0
21.0
394.33
16.30
25
0.84054
0.0
8.14
0.0
0.538
5.599
85.7
4.4546
4.0
307.0
21.0
303.42
16.51
26
0.67191
0.0
8.14
0.0
0.538
5.813
90.3
4.6820
4.0
307.0
21.0
376.88
14.81
27
0.95577
0.0
8.14
0.0
0.538
6.047
88.8
4.4534
4.0
307.0
21.0
306.38
17.28
28
0.77299
0.0
8.14
0.0
0.538
6.495
94.4
4.4547
4.0
307.0
21.0
387.94
12.80
29
1.00245
0.0
8.14
0.0
0.538
6.674
87.3
4.2390
4.0
307.0
21.0
380.23
11.98
...
...
...
...
...
...
...
...
...
...
...
...
...
...
476
4.87141
0.0
18.10
0.0
0.614
6.484
93.6
2.3053
24.0
666.0
20.2
396.21
18.68
477
15.02340
0.0
18.10
0.0
0.614
5.304
97.3
2.1007
24.0
666.0
20.2
349.48
24.91
478
10.23300
0.0
18.10
0.0
0.614
6.185
96.7
2.1705
24.0
666.0
20.2
379.70
18.03
479
14.33370
0.0
18.10
0.0
0.614
6.229
88.0
1.9512
24.0
666.0
20.2
383.32
13.11
480
5.82401
0.0
18.10
0.0
0.532
6.242
64.7
3.4242
24.0
666.0
20.2
396.90
10.74
481
5.70818
0.0
18.10
0.0
0.532
6.750
74.9
3.3317
24.0
666.0
20.2
393.07
7.74
482
5.73116
0.0
18.10
0.0
0.532
7.061
77.0
3.4106
24.0
666.0
20.2
395.28
7.01
483
2.81838
0.0
18.10
0.0
0.532
5.762
40.3
4.0983
24.0
666.0
20.2
392.92
10.42
484
2.37857
0.0
18.10
0.0
0.583
5.871
41.9
3.7240
24.0
666.0
20.2
370.73
13.34
485
3.67367
0.0
18.10
0.0
0.583
6.312
51.9
3.9917
24.0
666.0
20.2
388.62
10.58
486
5.69175
0.0
18.10
0.0
0.583
6.114
79.8
3.5459
24.0
666.0
20.2
392.68
14.98
487
4.83567
0.0
18.10
0.0
0.583
5.905
53.2
3.1523
24.0
666.0
20.2
388.22
11.45
488
0.15086
0.0
27.74
0.0
0.609
5.454
92.7
1.8209
4.0
711.0
20.1
395.09
18.06
489
0.18337
0.0
27.74
0.0
0.609
5.414
98.3
1.7554
4.0
711.0
20.1
344.05
23.97
490
0.20746
0.0
27.74
0.0
0.609
5.093
98.0
1.8226
4.0
711.0
20.1
318.43
29.68
491
0.10574
0.0
27.74
0.0
0.609
5.983
98.8
1.8681
4.0
711.0
20.1
390.11
18.07
492
0.11132
0.0
27.74
0.0
0.609
5.983
83.5
2.1099
4.0
711.0
20.1
396.90
13.35
493
0.17331
0.0
9.69
0.0
0.585
5.707
54.0
2.3817
6.0
391.0
19.2
396.90
12.01
494
0.27957
0.0
9.69
0.0
0.585
5.926
42.6
2.3817
6.0
391.0
19.2
396.90
13.59
495
0.17899
0.0
9.69
0.0
0.585
5.670
28.8
2.7986
6.0
391.0
19.2
393.29
17.60
496
0.28960
0.0
9.69
0.0
0.585
5.390
72.9
2.7986
6.0
391.0
19.2
396.90
21.14
497
0.26838
0.0
9.69
0.0
0.585
5.794
70.6
2.8927
6.0
391.0
19.2
396.90
14.10
498
0.23912
0.0
9.69
0.0
0.585
6.019
65.3
2.4091
6.0
391.0
19.2
396.90
12.92
499
0.17783
0.0
9.69
0.0
0.585
5.569
73.5
2.3999
6.0
391.0
19.2
395.77
15.10
500
0.22438
0.0
9.69
0.0
0.585
6.027
79.7
2.4982
6.0
391.0
19.2
396.90
14.33
501
0.06263
0.0
11.93
0.0
0.573
6.593
69.1
2.4786
1.0
273.0
21.0
391.99
9.67
502
0.04527
0.0
11.93
0.0
0.573
6.120
76.7
2.2875
1.0
273.0
21.0
396.90
9.08
503
0.06076
0.0
11.93
0.0
0.573
6.976
91.0
2.1675
1.0
273.0
21.0
396.90
5.64
504
0.10959
0.0
11.93
0.0
0.573
6.794
89.3
2.3889
1.0
273.0
21.0
393.45
6.48
505
0.04741
0.0
11.93
0.0
0.573
6.030
80.8
2.5050
1.0
273.0
21.0
396.90
7.88
506 rows × 13 columns
groupbyによるグループ化
In [2]:
grouped = df['CRIM'].groupby(df['RAD'])
print(grouped)
<pandas.core.groupby.SeriesGroupBy object at 0x1117f0898>
SeriesGroupBy objectの中身を詳しく見てみます。
In [3]:
print(list(grouped))
[(1.0, 0 0.00632
193 0.02187
194 0.01439
254 0.04819
255 0.03548
283 0.01501
284 0.00906
285 0.01096
286 0.01965
331 0.05023
332 0.03466
341 0.01301
342 0.02498
349 0.02899
350 0.06211
501 0.06263
502 0.04527
503 0.06076
504 0.10959
505 0.04741
Name: CRIM, dtype: float64), (2.0, 1 0.02731
2 0.02729
56 0.02055
88 0.05660
89 0.05302
90 0.04684
91 0.03932
95 0.12204
96 0.11504
97 0.12083
98 0.08187
99 0.06860
120 0.06899
121 0.07165
122 0.09299
123 0.15038
124 0.09849
125 0.16902
126 0.38735
196 0.04011
197 0.04666
198 0.03768
201 0.03445
202 0.02177
Name: CRIM, dtype: float64), (3.0, 3 0.03237
4 0.06905
5 0.02985
39 0.02763
40 0.03359
41 0.12744
42 0.14150
43 0.15936
44 0.12269
45 0.17142
46 0.18836
47 0.22927
48 0.25387
49 0.21977
54 0.01360
64 0.01951
84 0.05059
85 0.05735
86 0.05188
87 0.07151
179 0.05780
180 0.06588
181 0.06888
182 0.09103
183 0.10008
184 0.08308
185 0.06047
186 0.05602
199 0.03150
200 0.01778
256 0.01538
269 0.09065
270 0.29916
271 0.16211
272 0.11460
273 0.22188
345 0.03113
346 0.06162
Name: CRIM, dtype: float64), (4.0, 13 0.62976
14 0.63796
15 0.62739
16 1.05393
17 0.78420
18 0.80271
19 0.72580
20 1.25179
21 0.85204
22 1.23247
23 0.98843
24 0.75026
25 0.84054
26 0.67191
27 0.95577
28 0.77299
29 1.00245
30 1.13081
31 1.35472
32 1.38799
33 1.15172
34 1.61282
50 0.08873
51 0.04337
52 0.05360
53 0.04981
65 0.03584
66 0.04379
67 0.05789
68 0.13554
...
294 0.08199
295 0.12932
296 0.05372
297 0.14103
308 0.49298
309 0.34940
310 2.63548
311 0.79041
312 0.26169
313 0.26938
314 0.36920
315 0.25356
316 0.31827
317 0.24522
318 0.40202
319 0.47547
328 0.06617
329 0.06724
330 0.04544
347 0.01870
348 0.01501
351 0.07950
352 0.07244
354 0.04301
355 0.10659
488 0.15086
489 0.18337
490 0.20746
491 0.10574
492 0.11132
Name: CRIM, Length: 110, dtype: float64), (5.0, 6 0.08829
7 0.14455
8 0.21124
9 0.17004
10 0.22489
11 0.11747
12 0.09378
35 0.06417
36 0.09744
37 0.08014
38 0.17505
55 0.01311
57 0.01432
74 0.07896
75 0.09512
76 0.10153
77 0.08707
78 0.05646
79 0.08387
100 0.14866
101 0.11432
102 0.22876
103 0.21161
104 0.13960
105 0.13262
106 0.17120
107 0.13117
108 0.12802
109 0.26363
110 0.10793
...
265 0.76162
266 0.78570
267 0.57834
268 0.54050
279 0.21038
280 0.03578
281 0.03705
282 0.06129
298 0.06466
299 0.05561
300 0.04417
320 0.16760
321 0.18159
322 0.35114
323 0.28392
324 0.34109
325 0.19186
326 0.30347
327 0.24103
333 0.05083
334 0.03738
335 0.03961
336 0.03427
337 0.03041
338 0.03306
339 0.05497
340 0.06151
343 0.02543
344 0.03049
353 0.01709
Name: CRIM, Length: 115, dtype: float64), (6.0, 111 0.10084
112 0.12329
113 0.22212
114 0.14231
115 0.17134
116 0.13158
117 0.15098
118 0.13058
119 0.14476
238 0.08244
239 0.09252
240 0.11329
241 0.10612
242 0.10290
243 0.12757
287 0.03871
288 0.04590
289 0.04297
493 0.17331
494 0.27957
495 0.17899
496 0.28960
497 0.26838
498 0.23912
499 0.17783
500 0.22438
Name: CRIM, dtype: float64), (7.0, 244 0.20608
245 0.19133
246 0.33983
247 0.19657
248 0.16439
249 0.19073
250 0.14030
251 0.21409
252 0.08221
253 0.36894
301 0.03537
302 0.09266
303 0.10000
304 0.05515
305 0.05479
306 0.07503
307 0.04932
Name: CRIM, dtype: float64), (8.0, 58 0.15445
59 0.10328
60 0.14932
61 0.17171
62 0.11027
63 0.12650
220 0.35809
221 0.40771
222 0.62356
223 0.61470
224 0.31533
225 0.52693
226 0.38214
227 0.41238
228 0.29819
229 0.44178
230 0.53700
231 0.46296
232 0.57529
233 0.33147
234 0.44791
235 0.33045
236 0.52058
237 0.51183
Name: CRIM, dtype: float64), (24.0, 356 8.98296
357 3.84970
358 5.20177
359 4.26131
360 4.54192
361 3.83684
362 3.67822
363 4.22239
364 3.47428
365 4.55587
366 3.69695
367 13.52220
368 4.89822
369 5.66998
370 6.53876
371 9.23230
372 8.26725
373 11.10810
374 18.49820
375 19.60910
376 15.28800
377 9.82349
378 23.64820
379 17.86670
380 88.97620
381 15.87440
382 9.18702
383 7.99248
384 20.08490
385 16.81180
...
458 7.75223
459 6.80117
460 4.81213
461 3.69311
462 6.65492
463 5.82115
464 7.83932
465 3.16360
466 3.77498
467 4.42228
468 15.57570
469 13.07510
470 4.34879
471 4.03841
472 3.56868
473 4.64689
474 8.05579
475 6.39312
476 4.87141
477 15.02340
478 10.23300
479 14.33370
480 5.82401
481 5.70818
482 5.73116
483 2.81838
484 2.37857
485 3.67367
486 5.69175
487 4.83567
Name: CRIM, Length: 132, dtype: float64)]
In [4]:
print(list(grouped)[0])
(1.0, 0 0.00632
193 0.02187
194 0.01439
254 0.04819
255 0.03548
283 0.01501
284 0.00906
285 0.01096
286 0.01965
331 0.05023
332 0.03466
341 0.01301
342 0.02498
349 0.02899
350 0.06211
501 0.06263
502 0.04527
503 0.06076
504 0.10959
505 0.04741
Name: CRIM, dtype: float64)
In [5]:
print(list(grouped)[0][1])
0 0.00632
193 0.02187
194 0.01439
254 0.04819
255 0.03548
283 0.01501
284 0.00906
285 0.01096
286 0.01965
331 0.05023
332 0.03466
341 0.01301
342 0.02498
349 0.02899
350 0.06211
501 0.06263
502 0.04527
503 0.06076
504 0.10959
505 0.04741
Name: CRIM, dtype: float64
In [6]:
type(list(grouped)[0][1])
Out[6]:
pandas.core.series.Series
In [7]:
list(grouped)[0][1][0]
Out[7]:
0.0063200000000000001
In [8]:
len(grouped)
Out[8]:
9
In [9]:
df['RAD'].unique()
Out[9]:
array([ 1., 2., 3., 5., 4., 8., 6., 7., 24.])
グループごとの集計を行います。
In [10]:
grouped.mean()
Out[10]:
RAD
1.0 0.036029
2.0 0.083285
3.0 0.097359
4.0 0.393903
5.0 0.687792
6.0 0.150054
7.0 0.150399
8.0 0.371410
24.0 12.683533
Name: CRIM, dtype: float64
In [11]:
grouped.var()
Out[11]:
RAD
1.0 0.000647
2.0 0.005898
3.0 0.005584
4.0 0.206315
5.0 0.888697
6.0 0.004962
7.0 0.009650
8.0 0.027327
24.0 171.148377
Name: CRIM, dtype: float64
どのフィールドによってグループ化するかは複数のフィールドを指定できます。
In [12]:
grouped2 = df['CRIM'].groupby([df['RAD'],df['CHAS']])
list(grouped2)
Out[12]:
[((1.0, 0.0), 0 0.00632
193 0.02187
194 0.01439
254 0.04819
255 0.03548
284 0.00906
285 0.01096
286 0.01965
331 0.05023
332 0.03466
341 0.01301
342 0.02498
349 0.02899
350 0.06211
501 0.06263
502 0.04527
503 0.06076
504 0.10959
505 0.04741
Name: CRIM, dtype: float64), ((1.0, 1.0), 283 0.01501
Name: CRIM, dtype: float64), ((2.0, 0.0), 1 0.02731
2 0.02729
56 0.02055
88 0.05660
89 0.05302
90 0.04684
91 0.03932
95 0.12204
96 0.11504
97 0.12083
98 0.08187
99 0.06860
120 0.06899
121 0.07165
122 0.09299
123 0.15038
124 0.09849
125 0.16902
126 0.38735
196 0.04011
197 0.04666
198 0.03768
201 0.03445
202 0.02177
Name: CRIM, dtype: float64), ((3.0, 0.0), 3 0.03237
4 0.06905
5 0.02985
39 0.02763
40 0.03359
41 0.12744
42 0.14150
43 0.15936
44 0.12269
45 0.17142
46 0.18836
47 0.22927
48 0.25387
49 0.21977
54 0.01360
64 0.01951
84 0.05059
85 0.05735
86 0.05188
87 0.07151
179 0.05780
180 0.06588
181 0.06888
182 0.09103
183 0.10008
184 0.08308
185 0.06047
186 0.05602
199 0.03150
200 0.01778
256 0.01538
270 0.29916
271 0.16211
272 0.11460
345 0.03113
346 0.06162
Name: CRIM, dtype: float64), ((3.0, 1.0), 269 0.09065
273 0.22188
Name: CRIM, dtype: float64), ((4.0, 0.0), 13 0.62976
14 0.63796
15 0.62739
16 1.05393
17 0.78420
18 0.80271
19 0.72580
20 1.25179
21 0.85204
22 1.23247
23 0.98843
24 0.75026
25 0.84054
26 0.67191
27 0.95577
28 0.77299
29 1.00245
30 1.13081
31 1.35472
32 1.38799
33 1.15172
34 1.61282
50 0.08873
51 0.04337
52 0.05360
53 0.04981
65 0.03584
66 0.04379
67 0.05789
68 0.13554
...
294 0.08199
295 0.12932
296 0.05372
297 0.14103
308 0.49298
309 0.34940
310 2.63548
311 0.79041
312 0.26169
313 0.26938
314 0.36920
315 0.25356
316 0.31827
317 0.24522
318 0.40202
319 0.47547
328 0.06617
329 0.06724
330 0.04544
347 0.01870
348 0.01501
351 0.07950
352 0.07244
354 0.04301
355 0.10659
488 0.15086
489 0.18337
490 0.20746
491 0.10574
492 0.11132
Name: CRIM, Length: 102, dtype: float64), ((4.0, 1.0), 208 0.13587
209 0.43571
210 0.17446
211 0.37578
212 0.21719
274 0.05644
276 0.10469
277 0.06127
Name: CRIM, dtype: float64), ((5.0, 0.0), 6 0.08829
7 0.14455
8 0.21124
9 0.17004
10 0.22489
11 0.11747
12 0.09378
35 0.06417
36 0.09744
37 0.08014
38 0.17505
55 0.01311
57 0.01432
74 0.07896
75 0.09512
76 0.10153
77 0.08707
78 0.05646
79 0.08387
100 0.14866
101 0.11432
102 0.22876
103 0.21161
104 0.13960
105 0.13262
106 0.17120
107 0.13117
108 0.12802
109 0.26363
110 0.10793
...
264 0.55007
265 0.76162
266 0.78570
267 0.57834
268 0.54050
279 0.21038
280 0.03578
281 0.03705
298 0.06466
299 0.05561
300 0.04417
320 0.16760
321 0.18159
322 0.35114
323 0.28392
324 0.34109
325 0.19186
326 0.30347
327 0.24103
333 0.05083
334 0.03738
335 0.03961
336 0.03427
337 0.03041
338 0.03306
339 0.05497
340 0.06151
343 0.02543
344 0.03049
353 0.01709
Name: CRIM, Length: 104, dtype: float64), ((5.0, 1.0), 142 3.32105
152 1.12658
154 1.41385
155 3.53501
160 1.27346
162 1.83377
163 1.51902
216 0.04560
218 0.11069
219 0.11425
282 0.06129
Name: CRIM, dtype: float64), ((6.0, 0.0), 111 0.10084
112 0.12329
113 0.22212
114 0.14231
115 0.17134
116 0.13158
117 0.15098
118 0.13058
119 0.14476
238 0.08244
239 0.09252
240 0.11329
241 0.10612
242 0.10290
243 0.12757
287 0.03871
288 0.04590
289 0.04297
493 0.17331
494 0.27957
495 0.17899
496 0.28960
497 0.26838
498 0.23912
499 0.17783
500 0.22438
Name: CRIM, dtype: float64), ((7.0, 0.0), 244 0.20608
245 0.19133
246 0.33983
247 0.19657
248 0.16439
249 0.19073
250 0.14030
251 0.21409
252 0.08221
253 0.36894
301 0.03537
302 0.09266
303 0.10000
304 0.05515
305 0.05479
306 0.07503
307 0.04932
Name: CRIM, dtype: float64), ((8.0, 0.0), 58 0.15445
59 0.10328
60 0.14932
61 0.17171
62 0.11027
63 0.12650
223 0.61470
224 0.31533
225 0.52693
226 0.38214
227 0.41238
228 0.29819
229 0.44178
230 0.53700
231 0.46296
232 0.57529
233 0.33147
235 0.33045
237 0.51183
Name: CRIM, dtype: float64), ((8.0, 1.0), 220 0.35809
221 0.40771
222 0.62356
234 0.44791
236 0.52058
Name: CRIM, dtype: float64), ((24.0, 0.0), 359 4.26131
360 4.54192
361 3.83684
362 3.67822
365 4.55587
366 3.69695
367 13.52220
368 4.89822
371 9.23230
373 11.10810
374 18.49820
375 19.60910
376 15.28800
377 9.82349
378 23.64820
379 17.86670
380 88.97620
381 15.87440
382 9.18702
383 7.99248
384 20.08490
385 16.81180
386 24.39380
387 22.59710
388 14.33370
389 8.15174
390 6.96215
391 5.29305
392 11.57790
393 8.64476
...
458 7.75223
459 6.80117
460 4.81213
461 3.69311
462 6.65492
463 5.82115
464 7.83932
465 3.16360
466 3.77498
467 4.42228
468 15.57570
469 13.07510
470 4.34879
471 4.03841
472 3.56868
473 4.64689
474 8.05579
475 6.39312
476 4.87141
477 15.02340
478 10.23300
479 14.33370
480 5.82401
481 5.70818
482 5.73116
483 2.81838
484 2.37857
485 3.67367
486 5.69175
487 4.83567
Name: CRIM, Length: 124, dtype: float64), ((24.0, 1.0), 356 8.98296
357 3.84970
358 5.20177
363 4.22239
364 3.47428
369 5.66998
370 6.53876
372 8.26725
Name: CRIM, dtype: float64)]
In [13]:
means = grouped2.mean()
means
Out[13]:
RAD CHAS
1.0 0.0 0.037135
1.0 0.015010
2.0 0.0 0.083285
3.0 0.0 0.094087
1.0 0.156265
4.0 0.0 0.409490
1.0 0.195176
5.0 0.0 0.622514
1.0 1.304961
6.0 0.0 0.150054
7.0 0.0 0.150399
8.0 0.0 0.345052
1.0 0.471570
24.0 0.0 13.129188
1.0 5.775886
Name: CRIM, dtype: float64
マルチインデックスはunstackで行列形式に変形できます。
In [14]:
means.unstack()
Out[14]:
CHAS
0.0
1.0
RAD
1.0
0.037135
0.015010
2.0
0.083285
NaN
3.0
0.094087
0.156265
4.0
0.409490
0.195176
5.0
0.622514
1.304961
6.0
0.150054
NaN
7.0
0.150399
NaN
8.0
0.345052
0.471570
24.0
13.129188
5.775886
いくつかメソッドを試します。
In [15]:
print(grouped2.size())
print(grouped2.count())
print(grouped2.sum())
print(grouped2.median())
print(grouped2.std())
RAD CHAS
1.0 0.0 19
1.0 1
2.0 0.0 24
3.0 0.0 36
1.0 2
4.0 0.0 102
1.0 8
5.0 0.0 104
1.0 11
6.0 0.0 26
7.0 0.0 17
8.0 0.0 19
1.0 5
24.0 0.0 124
1.0 8
Name: CRIM, dtype: int64
RAD CHAS
1.0 0.0 19
1.0 1
2.0 0.0 24
3.0 0.0 36
1.0 2
4.0 0.0 102
1.0 8
5.0 0.0 104
1.0 11
6.0 0.0 26
7.0 0.0 17
8.0 0.0 19
1.0 5
24.0 0.0 124
1.0 8
Name: CRIM, dtype: int64
RAD CHAS
1.0 0.0 0.70556
1.0 0.01501
2.0 0.0 1.99885
3.0 0.0 3.38713
1.0 0.31253
4.0 0.0 41.76797
1.0 1.56141
5.0 0.0 64.74147
1.0 14.35457
6.0 0.0 3.90140
7.0 0.0 2.55679
8.0 0.0 6.55598
1.0 2.35785
24.0 0.0 1628.01931
1.0 46.20709
Name: CRIM, dtype: float64
RAD CHAS
1.0 0.0 0.034660
1.0 0.015010
2.0 0.0 0.062600
3.0 0.0 0.067380
1.0 0.156265
4.0 0.0 0.237455
1.0 0.155165
5.0 0.0 0.158130
1.0 1.273460
6.0 0.0 0.136945
7.0 0.0 0.140300
8.0 0.0 0.331470
1.0 0.447910
24.0 0.0 9.333990
1.0 5.435875
Name: CRIM, dtype: float64
RAD CHAS
1.0 0.0 0.025637
1.0 NaN
2.0 0.0 0.076800
3.0 0.0 0.073808
1.0 0.092794
4.0 0.0 0.466791
1.0 0.141558
5.0 0.0 0.888403
1.0 1.240349
6.0 0.0 0.070443
7.0 0.0 0.098234
8.0 0.0 0.170315
1.0 0.103705
24.0 0.0 13.369438
1.0 2.029636
Name: CRIM, dtype: float64
In [16]:
pd.pivot_table(df,values='CRIM', index='RAD', columns='CHAS', aggfunc='mean', fill_value=None)
Out[16]:
CHAS
0.0
1.0
RAD
1.0
0.037135
0.015010
2.0
0.083285
NaN
3.0
0.094087
0.156265
4.0
0.409490
0.195176
5.0
0.622514
1.304961
6.0
0.150054
NaN
7.0
0.150399
NaN
8.0
0.345052
0.471570
24.0
13.129188
5.775886
In [17]:
pd.pivot_table(df,values='CRIM', index='RAD', columns='CHAS', aggfunc='count', fill_value=None)
Out[17]:
CHAS
0.0
1.0
RAD
1.0
19.0
1.0
2.0
24.0
NaN
3.0
36.0
2.0
4.0
102.0
8.0
5.0
104.0
11.0
6.0
26.0
NaN
7.0
17.0
NaN
8.0
19.0
5.0
24.0
124.0
8.0
Seriesに対して数を数える
In [18]:
df['CHAS'].value_counts()
Out[18]:
0.0 471
1.0 35
Name: CHAS, dtype: int64
In [19]:
lst = [[_m,_n,_i*10+_j] for _i,_m in enumerate(['a','b','c']) for _j,_n in enumerate(['x','y'])]
df2 = pd.DataFrame(lst,columns=['field_a','field_x','number'])
df2
Out[19]:
field_a
field_x
number
0
a
x
0
1
a
y
1
2
b
x
10
3
b
y
11
4
c
x
20
5
c
y
21
In [20]:
df2.pivot(index='field_a',columns='field_x',values='number')
Out[20]:
field_x
x
y
field_a
a
0
1
b
10
11
c
20
21
nullがある場合
In [21]:
df2 = df2.drop(3,axis=0)
df2.pivot(index='field_a',columns='field_x',values='number')
Out[21]:
field_x
x
y
field_a
a
0.0
1.0
b
10.0
NaN
c
20.0
21.0
In [22]:
from pivottablejs import pivot_ui
In [23]:
pivot_ui(df)
Out[23]:
In [ ]: