In [8]:
import pandas as pd
college_file_name = 'data/College.csv'
college = pd.read_csv(college_file_name,index_col=0)
college
Out[8]:
Private
Apps
Accept
Enroll
Top10perc
Top25perc
F.Undergrad
P.Undergrad
Outstate
Room.Board
Books
Personal
PhD
Terminal
S.F.Ratio
perc.alumni
Expend
Grad.Rate
Abilene Christian University
Yes
1660
1232
721
23
52
2885
537
7440
3300
450
2200
70
78
18.1
12
7041
60
Adelphi University
Yes
2186
1924
512
16
29
2683
1227
12280
6450
750
1500
29
30
12.2
16
10527
56
Adrian College
Yes
1428
1097
336
22
50
1036
99
11250
3750
400
1165
53
66
12.9
30
8735
54
Agnes Scott College
Yes
417
349
137
60
89
510
63
12960
5450
450
875
92
97
7.7
37
19016
59
Alaska Pacific University
Yes
193
146
55
16
44
249
869
7560
4120
800
1500
76
72
11.9
2
10922
15
Albertson College
Yes
587
479
158
38
62
678
41
13500
3335
500
675
67
73
9.4
11
9727
55
Albertus Magnus College
Yes
353
340
103
17
45
416
230
13290
5720
500
1500
90
93
11.5
26
8861
63
Albion College
Yes
1899
1720
489
37
68
1594
32
13868
4826
450
850
89
100
13.7
37
11487
73
Albright College
Yes
1038
839
227
30
63
973
306
15595
4400
300
500
79
84
11.3
23
11644
80
Alderson-Broaddus College
Yes
582
498
172
21
44
799
78
10468
3380
660
1800
40
41
11.5
15
8991
52
Alfred University
Yes
1732
1425
472
37
75
1830
110
16548
5406
500
600
82
88
11.3
31
10932
73
Allegheny College
Yes
2652
1900
484
44
77
1707
44
17080
4440
400
600
73
91
9.9
41
11711
76
Allentown Coll. of St. Francis de Sales
Yes
1179
780
290
38
64
1130
638
9690
4785
600
1000
60
84
13.3
21
7940
74
Alma College
Yes
1267
1080
385
44
73
1306
28
12572
4552
400
400
79
87
15.3
32
9305
68
Alverno College
Yes
494
313
157
23
46
1317
1235
8352
3640
650
2449
36
69
11.1
26
8127
55
American International College
Yes
1420
1093
220
9
22
1018
287
8700
4780
450
1400
78
84
14.7
19
7355
69
Amherst College
Yes
4302
992
418
83
96
1593
5
19760
5300
660
1598
93
98
8.4
63
21424
100
Anderson University
Yes
1216
908
423
19
40
1819
281
10100
3520
550
1100
48
61
12.1
14
7994
59
Andrews University
Yes
1130
704
322
14
23
1586
326
9996
3090
900
1320
62
66
11.5
18
10908
46
Angelo State University
No
3540
2001
1016
24
54
4190
1512
5130
3592
500
2000
60
62
23.1
5
4010
34
Antioch University
Yes
713
661
252
25
44
712
23
15476
3336
400
1100
69
82
11.3
35
42926
48
Appalachian State University
No
7313
4664
1910
20
63
9940
1035
6806
2540
96
2000
83
96
18.3
14
5854
70
Aquinas College
Yes
619
516
219
20
51
1251
767
11208
4124
350
1615
55
65
12.7
25
6584
65
Arizona State University Main campus
No
12809
10308
3761
24
49
22593
7585
7434
4850
700
2100
88
93
18.9
5
4602
48
Arkansas College (Lyon College)
Yes
708
334
166
46
74
530
182
8644
3922
500
800
79
88
12.6
24
14579
54
Arkansas Tech University
No
1734
1729
951
12
52
3602
939
3460
2650
450
1000
57
60
19.6
5
4739
48
Assumption College
Yes
2135
1700
491
23
59
1708
689
12000
5920
500
500
93
93
13.8
30
7100
88
Auburn University-Main Campus
No
7548
6791
3070
25
57
16262
1716
6300
3933
600
1908
85
91
16.7
18
6642
69
Augsburg College
Yes
662
513
257
12
30
2074
726
11902
4372
540
950
65
65
12.8
31
7836
58
Augustana College IL
Yes
1879
1658
497
36
69
1950
38
13353
4173
540
821
78
83
12.7
40
9220
71
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
Westfield State College
No
3100
2150
825
3
20
3234
941
5542
3788
500
1300
75
79
15.7
20
4222
65
Westminster College MO
Yes
662
553
184
20
43
665
37
10720
4050
600
1650
66
70
12.5
20
7925
62
Westminster College
Yes
996
866
377
29
58
1411
72
12065
3615
430
685
62
78
12.5
41
8596
80
Westminster College of Salt Lake City
Yes
917
720
213
21
60
979
743
8820
4050
600
2025
68
83
10.5
34
7170
50
Westmont College
No
950
713
351
42
72
1276
9
14320
5304
490
1410
77
77
14.9
17
8837
87
Wheaton College IL
Yes
1432
920
548
56
84
2200
56
11480
4200
530
1400
81
83
12.7
40
11916
85
Westminster College PA
Yes
1738
1373
417
21
55
1335
30
18460
5970
700
850
92
96
13.2
41
22704
71
Wheeling Jesuit College
Yes
903
755
213
15
49
971
305
10500
4545
600
600
66
71
14.1
27
7494
72
Whitman College
Yes
1861
998
359
45
77
1220
46
16670
4900
750
800
80
83
10.5
51
13198
72
Whittier College
Yes
1681
1069
344
35
63
1235
30
16249
5699
500
1998
84
92
13.6
29
11778
52
Whitworth College
Yes
1121
926
372
43
70
1270
160
12660
4500
678
2424
80
80
16.9
20
8328
80
Widener University
Yes
2139
1492
502
24
64
2186
2171
12350
5370
500
1350
88
86
12.6
19
9603
63
Wilkes University
Yes
1631
1431
434
15
36
1803
603
11150
5130
550
1260
78
92
13.3
24
8543
67
Willamette University
Yes
1658
1327
395
49
80
1595
159
14800
4620
400
790
91
94
13.3
37
10779
68
William Jewell College
Yes
663
547
315
32
67
1279
75
10060
2970
500
2600
74
80
11.2
19
7885
59
William Woods University
Yes
469
435
227
17
39
851
120
10535
4365
550
3700
39
66
12.9
16
7438
52
Williams College
Yes
4186
1245
526
81
96
1988
29
19629
5790
500
1200
94
99
9.0
64
22014
99
Wilson College
Yes
167
130
46
16
50
199
676
11428
5084
450
475
67
76
8.3
43
10291
67
Wingate College
Yes
1239
1017
383
10
34
1207
157
7820
3400
550
1550
69
81
13.9
8
7264
91
Winona State University
No
3325
2047
1301
20
45
5800
872
4200
2700
300
1200
53
60
20.2
18
5318
58
Winthrop University
No
2320
1805
769
24
61
3395
670
6400
3392
580
2150
71
80
12.8
26
6729
59
Wisconsin Lutheran College
Yes
152
128
75
17
41
282
22
9100
3700
500
1400
48
48
8.5
26
8960
50
Wittenberg University
Yes
1979
1739
575
42
68
1980
144
15948
4404
400
800
82
95
12.8
29
10414
78
Wofford College
Yes
1501
935
273
51
83
1059
34
12680
4150
605
1440
91
92
15.3
42
7875
75
Worcester Polytechnic Institute
Yes
2768
2314
682
49
86
2802
86
15884
5370
530
730
92
94
15.2
34
10774
82
Worcester State College
No
2197
1515
543
4
26
3089
2029
6797
3900
500
1200
60
60
21.0
14
4469
40
Xavier University
Yes
1959
1805
695
24
47
2849
1107
11520
4960
600
1250
73
75
13.3
31
9189
83
Xavier University of Louisiana
Yes
2097
1915
695
34
61
2793
166
6900
4200
617
781
67
75
14.4
20
8323
49
Yale University
Yes
10705
2453
1317
95
99
5217
83
19840
6510
630
2115
96
96
5.8
49
40386
99
York College of Pennsylvania
Yes
2989
1855
691
28
63
2988
1726
4990
3560
500
1250
75
75
18.1
28
4509
99
777 rows × 18 columns
In [13]:
college.describe()
Out[13]:
Apps
Accept
Enroll
Top10perc
Top25perc
F.Undergrad
P.Undergrad
Outstate
Room.Board
Books
Personal
PhD
Terminal
S.F.Ratio
perc.alumni
Expend
Grad.Rate
count
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.000000
777.00000
mean
3001.638353
2018.804376
779.972973
27.558559
55.796654
3699.907336
855.298584
10440.669241
4357.526384
549.380952
1340.642214
72.660232
79.702703
14.089704
22.743887
9660.171171
65.46332
std
3870.201484
2451.113971
929.176190
17.640364
19.804778
4850.420531
1522.431887
4023.016484
1096.696416
165.105360
677.071454
16.328155
14.722359
3.958349
12.391801
5221.768440
17.17771
min
81.000000
72.000000
35.000000
1.000000
9.000000
139.000000
1.000000
2340.000000
1780.000000
96.000000
250.000000
8.000000
24.000000
2.500000
0.000000
3186.000000
10.00000
25%
776.000000
604.000000
242.000000
15.000000
41.000000
992.000000
95.000000
7320.000000
3597.000000
470.000000
850.000000
62.000000
71.000000
11.500000
13.000000
6751.000000
53.00000
50%
1558.000000
1110.000000
434.000000
23.000000
54.000000
1707.000000
353.000000
9990.000000
4200.000000
500.000000
1200.000000
75.000000
82.000000
13.600000
21.000000
8377.000000
65.00000
75%
3624.000000
2424.000000
902.000000
35.000000
69.000000
4005.000000
967.000000
12925.000000
5050.000000
600.000000
1700.000000
85.000000
92.000000
16.500000
31.000000
10830.000000
78.00000
max
48094.000000
26330.000000
6392.000000
96.000000
100.000000
31643.000000
21836.000000
21700.000000
8124.000000
2340.000000
6800.000000
103.000000
100.000000
39.800000
64.000000
56233.000000
118.00000
In [9]:
import matplotlib.pyplot as plt
column_10 = college.columns[-10:]
def getColumns(idx):
values = college[column_10[idx]]
return [item for item in values]
for row in range(10):
for col in range(10):
if row!=col:
plt.subplot(10, 10, row*10+col+1)
plt.scatter(getColumns(row), getColumns(col))
plt.axis('off')
plt.show()
In [64]:
import numpy as np
Top10prerc=college['Top10perc'].values
elite = np.array(range(len(Top10prerc)), dtype=str)
elite[Top10prerc>50]='Yes'
elite[Top10prerc<=50]='No'
colleage['Elite']=elite
In [93]:
def createHist(data, bin_num=20):
hist, bins = np.histogram(data,bins=bin_num)
center = (bins[:-1] + bins[1:]) / 2
width = 0.7 * (bins[1] - bins[0])
return center, hist, width
def draw_hist(col_name, pos):
data = colleage[col_name].values
center, hist, width = createHist(data)
plt.subplot(2,2,pos)
plt.bar(center, hist, align='center', width=width)
draw_hist('Apps', 1)
draw_hist('Enroll',2)
draw_hist('Outstate',3)
draw_hist('P.Undergrad',4)
plt.show()
In [10]:
auto = pd.read_table('data/Auto',sep='\s+')
rows=np.sum(auto.values=='?',axis=1)
delete_rows = []
for idx,_ in enumerate(rows):
if _!=0:
delete_rows.append(idx)
auto=auto.drop(auto.index[delete_rows])
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-10-488606213387> in <module>()
1 auto = pd.read_table('data/Auto',sep='\s+')
----> 2 rows=np.sum(auto.values=='?',axis=1)
3 delete_rows = []
4 for idx,_ in enumerate(rows):
5 if _!=0:
NameError: name 'np' is not defined
In [148]:
mpg = auto['mpg'].values
displacement = auto['displacement'].values
horsepower = auto['horsepower'].values
weight = auto['weight'].values
acceleration = auto['acceleration'].values
plt.subplot(2,2,1)
plt.scatter(displacement, mpg)
plt.subplot(2,2,2)
plt.scatter(weight, mpg)
plt.subplot(2,2,3)
plt.scatter(acceleration, mpg)
plt.subplot(2,2,4)
displacement = [float(item) for item in displacement]
plt.scatter(displacement, mpg)
plt.show()
In [150]:
boston = pd.read_csv('Boston.csv', index_col=0)
boston
Out[150]:
crim
zn
indus
chas
nox
rm
age
dis
rad
tax
ptratio
black
lstat
medv
1
0.00632
18.0
2.31
0
0.538
6.575
65.2
4.0900
1
296
15.3
396.90
4.98
24.0
2
0.02731
0.0
7.07
0
0.469
6.421
78.9
4.9671
2
242
17.8
396.90
9.14
21.6
3
0.02729
0.0
7.07
0
0.469
7.185
61.1
4.9671
2
242
17.8
392.83
4.03
34.7
4
0.03237
0.0
2.18
0
0.458
6.998
45.8
6.0622
3
222
18.7
394.63
2.94
33.4
5
0.06905
0.0
2.18
0
0.458
7.147
54.2
6.0622
3
222
18.7
396.90
5.33
36.2
6
0.02985
0.0
2.18
0
0.458
6.430
58.7
6.0622
3
222
18.7
394.12
5.21
28.7
7
0.08829
12.5
7.87
0
0.524
6.012
66.6
5.5605
5
311
15.2
395.60
12.43
22.9
8
0.14455
12.5
7.87
0
0.524
6.172
96.1
5.9505
5
311
15.2
396.90
19.15
27.1
9
0.21124
12.5
7.87
0
0.524
5.631
100.0
6.0821
5
311
15.2
386.63
29.93
16.5
10
0.17004
12.5
7.87
0
0.524
6.004
85.9
6.5921
5
311
15.2
386.71
17.10
18.9
11
0.22489
12.5
7.87
0
0.524
6.377
94.3
6.3467
5
311
15.2
392.52
20.45
15.0
12
0.11747
12.5
7.87
0
0.524
6.009
82.9
6.2267
5
311
15.2
396.90
13.27
18.9
13
0.09378
12.5
7.87
0
0.524
5.889
39.0
5.4509
5
311
15.2
390.50
15.71
21.7
14
0.62976
0.0
8.14
0
0.538
5.949
61.8
4.7075
4
307
21.0
396.90
8.26
20.4
15
0.63796
0.0
8.14
0
0.538
6.096
84.5
4.4619
4
307
21.0
380.02
10.26
18.2
16
0.62739
0.0
8.14
0
0.538
5.834
56.5
4.4986
4
307
21.0
395.62
8.47
19.9
17
1.05393
0.0
8.14
0
0.538
5.935
29.3
4.4986
4
307
21.0
386.85
6.58
23.1
18
0.78420
0.0
8.14
0
0.538
5.990
81.7
4.2579
4
307
21.0
386.75
14.67
17.5
19
0.80271
0.0
8.14
0
0.538
5.456
36.6
3.7965
4
307
21.0
288.99
11.69
20.2
20
0.72580
0.0
8.14
0
0.538
5.727
69.5
3.7965
4
307
21.0
390.95
11.28
18.2
21
1.25179
0.0
8.14
0
0.538
5.570
98.1
3.7979
4
307
21.0
376.57
21.02
13.6
22
0.85204
0.0
8.14
0
0.538
5.965
89.2
4.0123
4
307
21.0
392.53
13.83
19.6
23
1.23247
0.0
8.14
0
0.538
6.142
91.7
3.9769
4
307
21.0
396.90
18.72
15.2
24
0.98843
0.0
8.14
0
0.538
5.813
100.0
4.0952
4
307
21.0
394.54
19.88
14.5
25
0.75026
0.0
8.14
0
0.538
5.924
94.1
4.3996
4
307
21.0
394.33
16.30
15.6
26
0.84054
0.0
8.14
0
0.538
5.599
85.7
4.4546
4
307
21.0
303.42
16.51
13.9
27
0.67191
0.0
8.14
0
0.538
5.813
90.3
4.6820
4
307
21.0
376.88
14.81
16.6
28
0.95577
0.0
8.14
0
0.538
6.047
88.8
4.4534
4
307
21.0
306.38
17.28
14.8
29
0.77299
0.0
8.14
0
0.538
6.495
94.4
4.4547
4
307
21.0
387.94
12.80
18.4
30
1.00245
0.0
8.14
0
0.538
6.674
87.3
4.2390
4
307
21.0
380.23
11.98
21.0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
477
4.87141
0.0
18.10
0
0.614
6.484
93.6
2.3053
24
666
20.2
396.21
18.68
16.7
478
15.02340
0.0
18.10
0
0.614
5.304
97.3
2.1007
24
666
20.2
349.48
24.91
12.0
479
10.23300
0.0
18.10
0
0.614
6.185
96.7
2.1705
24
666
20.2
379.70
18.03
14.6
480
14.33370
0.0
18.10
0
0.614
6.229
88.0
1.9512
24
666
20.2
383.32
13.11
21.4
481
5.82401
0.0
18.10
0
0.532
6.242
64.7
3.4242
24
666
20.2
396.90
10.74
23.0
482
5.70818
0.0
18.10
0
0.532
6.750
74.9
3.3317
24
666
20.2
393.07
7.74
23.7
483
5.73116
0.0
18.10
0
0.532
7.061
77.0
3.4106
24
666
20.2
395.28
7.01
25.0
484
2.81838
0.0
18.10
0
0.532
5.762
40.3
4.0983
24
666
20.2
392.92
10.42
21.8
485
2.37857
0.0
18.10
0
0.583
5.871
41.9
3.7240
24
666
20.2
370.73
13.34
20.6
486
3.67367
0.0
18.10
0
0.583
6.312
51.9
3.9917
24
666
20.2
388.62
10.58
21.2
487
5.69175
0.0
18.10
0
0.583
6.114
79.8
3.5459
24
666
20.2
392.68
14.98
19.1
488
4.83567
0.0
18.10
0
0.583
5.905
53.2
3.1523
24
666
20.2
388.22
11.45
20.6
489
0.15086
0.0
27.74
0
0.609
5.454
92.7
1.8209
4
711
20.1
395.09
18.06
15.2
490
0.18337
0.0
27.74
0
0.609
5.414
98.3
1.7554
4
711
20.1
344.05
23.97
7.0
491
0.20746
0.0
27.74
0
0.609
5.093
98.0
1.8226
4
711
20.1
318.43
29.68
8.1
492
0.10574
0.0
27.74
0
0.609
5.983
98.8
1.8681
4
711
20.1
390.11
18.07
13.6
493
0.11132
0.0
27.74
0
0.609
5.983
83.5
2.1099
4
711
20.1
396.90
13.35
20.1
494
0.17331
0.0
9.69
0
0.585
5.707
54.0
2.3817
6
391
19.2
396.90
12.01
21.8
495
0.27957
0.0
9.69
0
0.585
5.926
42.6
2.3817
6
391
19.2
396.90
13.59
24.5
496
0.17899
0.0
9.69
0
0.585
5.670
28.8
2.7986
6
391
19.2
393.29
17.60
23.1
497
0.28960
0.0
9.69
0
0.585
5.390
72.9
2.7986
6
391
19.2
396.90
21.14
19.7
498
0.26838
0.0
9.69
0
0.585
5.794
70.6
2.8927
6
391
19.2
396.90
14.10
18.3
499
0.23912
0.0
9.69
0
0.585
6.019
65.3
2.4091
6
391
19.2
396.90
12.92
21.2
500
0.17783
0.0
9.69
0
0.585
5.569
73.5
2.3999
6
391
19.2
395.77
15.10
17.5
501
0.22438
0.0
9.69
0
0.585
6.027
79.7
2.4982
6
391
19.2
396.90
14.33
16.8
502
0.06263
0.0
11.93
0
0.573
6.593
69.1
2.4786
1
273
21.0
391.99
9.67
22.4
503
0.04527
0.0
11.93
0
0.573
6.120
76.7
2.2875
1
273
21.0
396.90
9.08
20.6
504
0.06076
0.0
11.93
0
0.573
6.976
91.0
2.1675
1
273
21.0
396.90
5.64
23.9
505
0.10959
0.0
11.93
0
0.573
6.794
89.3
2.3889
1
273
21.0
393.45
6.48
22.0
506
0.04741
0.0
11.93
0
0.573
6.030
80.8
2.5050
1
273
21.0
396.90
7.88
11.9
506 rows × 14 columns
In [151]:
crim = boston['crim'].values
zn = boston['zn'].values
plt.scatter(crim, zn)
plt.show()
In [152]:
indus=boston['indus'].values
plt.scatter(crim, indus)
plt.show()
In [173]:
nox = boston['nox'].values
plt.scatter(nox,crim,)
plt.show()
In [172]:
rm = boston['rm'].values
plt.scatter(rm,crim, )
plt.show()
In [171]:
age = boston['age'].values
plt.scatter(age,crim, )
plt.show()
In [170]:
dis = boston['dis'].values
plt.scatter(dis,crim)
plt.show()
In [158]:
rad = boston['rad'].values
plt.scatter(crim, rad)
plt.show()
In [169]:
tax = boston['tax'].values
plt.scatter(tax,crim, )
plt.show()
In [168]:
ptratio=boston['ptratio'].values
plt.scatter(ptratio,crim, )
plt.show()
In [167]:
b = boston['black'].values
plt.scatter(b,crim, )
plt.show()
In [165]:
lstat = boston['lstat'].values
plt.scatter(lstat,crim)
plt.show()
In [166]:
medv = boston['medv'].values
plt.scatter(medv,crim, )
plt.show()
In [ ]:
Content source: gaufung/ISL
Similar notebooks: