In [50]:
# jupyter magic
%matplotlib inline
# python scientific stack
import numpy as np
import pandas as pd
import scipy.stats as scs
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
# fileformat
from simpledbf import Dbf5
In [8]:
# excel
#df = pd.read_excel('data/labo2/SR_Data.xls')
# DBF (Dbase)
dbf = Dbf5('data/labo2/SR_Data.dbf')
df = dbf.to_dataframe()
# SPSS
# savReaderWriter error with pip install
# SAS
# sas7dbat
In [10]:
# show vars
df.columns
# delete var
df = df.drop('Shape_Leng', 1) # 1 = column axis
# df.drop('Shape_Leng', 1, inplace=True) # same as previous, inplace impacts this dataframe instead of the returned one
# rename var
df = df.rename(columns={'POPTOT_FR':'POPTOT'})
# create var
df['km'] = df['Shape_Area'] / 1000000
df['HabKm2'] = df['POPTOT'] / df['km']
# show data head
df.head()
Out[10]:
POPTOT
FAIBLEREV
MONOPCT
MENAGE1PCT
IMMREC_PCT
TX_CHOM
NOECOLEPCT
SCO_M9PCT
SCO_M13PCT
PARTIELPCT
FAIBREVPCT
INDICE_PAU
Dist_Min
N_1000
Dist_Moy_3
Shape_Area
km
HabKm2
0
970
35
11.48
16.67
1.03
1.80
8.00
6.16
24.66
45.13
3.61
0.49681
6264.772
0.000
8835.786
7483046.586030
7.483047
129.626348
1
9105
2965
21.74
24.22
5.43
10.36
30.42
9.64
34.44
41.68
32.56
1.49218
1458.956
0.179
3352.854
2958949.272110
2.958949
3077.105811
2
4190
435
13.93
26.59
3.10
4.55
22.69
3.76
28.84
40.94
10.38
0.69996
1094.887
0.372
1862.379
1452462.958910
1.452463
2884.755149
3
1300
335
22.95
60.36
0.77
8.89
68.75
7.23
36.60
33.33
25.77
1.15688
1155.835
0.348
1826.470
683634.529876
0.683635
1901.600845
4
6270
1010
15.47
21.96
3.43
7.52
29.31
4.59
33.22
45.08
16.11
0.89715
1097.945
0.590
1652.041
1764655.049240
1.764655
3553.102349
In [65]:
#scs.skew(df)
df.skew()
Out[65]:
POPTOT 0.460748
FAIBLEREV 1.305112
MONOPCT 0.318384
MENAGE1PCT -0.195568
IMMREC_PCT 1.889274
TX_CHOM 2.071395
NOECOLEPCT 0.131756
SCO_M9PCT 0.255036
SCO_M13PCT -0.209523
PARTIELPCT 0.440455
FAIBREVPCT 0.357728
INDICE_PAU 0.294139
Dist_Min 3.526814
N_1000 0.956193
Dist_Moy_3 3.622690
Shape_Area 8.012661
km 8.012661
HabKm2 1.546871
SqrtDens 0.170539
SqrtImg 0.608161
LogDens -1.356552
LogImg 0.028800
dtype: float64
In [63]:
df.kurt() # or df.kurtosis()
Out[63]:
POPTOT -0.260861
FAIBLEREV 2.206736
MONOPCT 0.536391
MENAGE1PCT -0.348104
IMMREC_PCT 3.909355
TX_CHOM 10.961608
NOECOLEPCT -0.121454
SCO_M9PCT -0.745866
SCO_M13PCT -0.958561
PARTIELPCT 0.566064
FAIBREVPCT -0.123080
INDICE_PAU 0.094835
Dist_Min 19.814668
N_1000 0.816666
Dist_Moy_3 21.412225
Shape_Area 88.786170
km 88.786170
HabKm2 5.721056
SqrtDens 0.353084
SqrtImg 0.683489
LogDens 2.847751
LogImg -0.144432
dtype: float64
In [54]:
df['HabKm2'].plot()
Out[54]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f641d933208>
In [51]:
scs.kstest(df['SqrtDens'], 'norm')
Out[51]:
KstestResult(statistic=1.0, pvalue=0.0)
In [ ]:
scs.shapiro()
In [13]:
df['SqrtDens'] = np.sqrt(df['HabKm2'])
df['SqrtImg'] = np.sqrt(df['IMMREC_PCT'])
In [14]:
# log(0) = error
df['LogDens'] = np.log(df['HabKm2'])
df['LogImg'] = np.log(df['IMMREC_PCT'] + 1)
In [16]:
df.describe()
Out[16]:
POPTOT
FAIBLEREV
MONOPCT
MENAGE1PCT
IMMREC_PCT
TX_CHOM
NOECOLEPCT
SCO_M9PCT
SCO_M13PCT
PARTIELPCT
...
Dist_Min
N_1000
Dist_Moy_3
Shape_Area
km
HabKm2
SqrtDens
SqrtImg
LogDens
LogImg
count
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
...
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
506.000000
mean
3499.545455
1015.345850
21.383597
37.674071
5.199427
9.455593
32.656047
14.634644
39.424012
45.614526
...
909.695781
1.219806
1489.524081
962203.710779
0.962204
7996.216339
84.075317
2.074328
8.699663
1.588005
std
1621.165878
674.211405
7.843321
12.939146
4.678498
4.546150
12.192593
8.455721
15.275392
6.515781
...
667.547371
0.985640
861.347812
1990476.202097
1.990476
5469.201650
30.485967
0.947822
0.883840
0.688674
min
245.000000
10.000000
0.000000
3.940000
0.000000
0.000000
0.000000
0.000000
5.100000
30.650000
...
182.540000
0.000000
422.322000
38220.592566
0.038221
123.982571
11.134746
0.000000
4.820141
0.000000
25%
2241.250000
521.250000
16.052500
28.585000
2.112500
6.592500
24.515000
7.560000
27.932500
41.255000
...
534.706250
0.477750
1013.665000
238109.266525
0.238109
3858.921403
62.119965
1.453443
8.258127
1.135425
50%
3327.500000
900.000000
21.225000
38.600000
3.850000
8.555000
32.655000
14.230000
40.260000
45.465000
...
728.893000
1.000000
1262.812500
482166.445382
0.482166
6901.880415
83.077555
1.962140
8.839549
1.578977
75%
4543.750000
1330.000000
26.185000
46.755000
6.472500
11.670000
40.932500
20.967500
52.140000
49.652500
...
1049.549000
1.807750
1634.273500
936821.000361
0.936821
11370.808679
106.633992
2.544110
9.338805
2.011228
max
9105.000000
4195.000000
51.280000
72.630000
25.790000
47.440000
68.750000
37.050000
70.490000
69.790000
...
6389.749000
5.564000
8835.786000
28875026.240400
28.875026
44776.971544
211.605698
5.078386
10.709449
3.288029
8 rows × 22 columns
In [31]:
df.mean()
df.std()
df.min()
df.max()
df.median()
#df.range() : min, max
df.quantile(0.75) # param : 0.25, 0.75... default 0.5
Out[31]:
POPTOT 4543.750000
FAIBLEREV 1330.000000
MONOPCT 26.185000
MENAGE1PCT 46.755000
IMMREC_PCT 6.472500
TX_CHOM 11.670000
NOECOLEPCT 40.932500
SCO_M9PCT 20.967500
SCO_M13PCT 52.140000
PARTIELPCT 49.652500
FAIBREVPCT 39.800000
INDICE_PAU 1.930343
Dist_Min 1049.549000
N_1000 1.807750
Dist_Moy_3 1634.273500
Shape_Area 936821.000361
km 0.936821
HabKm2 11370.808679
SqrtDens 106.633992
SqrtImg 2.544110
LogDens 9.338805
LogImg 2.011228
dtype: float64
In [34]:
df.
Out[34]:
POPTOT
FAIBLEREV
MONOPCT
MENAGE1PCT
IMMREC_PCT
TX_CHOM
NOECOLEPCT
SCO_M9PCT
SCO_M13PCT
PARTIELPCT
...
Dist_Min
N_1000
Dist_Moy_3
Shape_Area
km
HabKm2
SqrtDens
SqrtImg
LogDens
LogImg
0
970
35
11.48
16.67
1.03
1.80
8.00
6.16
24.66
45.13
...
6264.772
0.000
8835.786
7483046.586030
7.483047
129.626348
11.385357
1.014889
4.864656
0.708036
1
9105
2965
21.74
24.22
5.43
10.36
30.42
9.64
34.44
41.68
...
1458.956
0.179
3352.854
2958949.272110
2.958949
3077.105811
55.471667
2.330236
8.031745
1.860975
2
4190
435
13.93
26.59
3.10
4.55
22.69
3.76
28.84
40.94
...
1094.887
0.372
1862.379
1452462.958910
1.452463
2884.755149
53.709917
1.760682
7.967195
1.410987
3
1300
335
22.95
60.36
0.77
8.89
68.75
7.23
36.60
33.33
...
1155.835
0.348
1826.470
683634.529876
0.683635
1901.600845
43.607349
0.877496
7.550451
0.570980
4
6270
1010
15.47
21.96
3.43
7.52
29.31
4.59
33.22
45.08
...
1097.945
0.590
1652.041
1764655.049240
1.764655
3553.102349
59.607905
1.852026
8.175576
1.488400
5
4340
935
16.88
23.00
3.11
4.61
25.00
6.61
34.03
47.60
...
705.672
1.075
1343.423
1105846.567120
1.105847
3924.595083
62.646589
1.763519
8.275018
1.413423
6
4255
1025
19.82
33.82
7.40
7.53
24.77
9.62
39.10
39.95
...
889.811
0.767
1360.426
1233814.262970
1.233814
3448.655221
58.725252
2.720294
8.145740
2.128232
7
5440
545
17.41
21.61
1.84
5.93
29.37
5.83
31.05
37.66
...
747.516
1.122
1227.202
1964415.375340
1.964415
2769.271748
52.623871
1.356466
7.926340
1.043804
8
5840
725
17.71
12.66
4.62
3.02
27.52
7.70
32.17
39.34
...
1725.722
0.000
2126.513
1889967.162310
1.889967
3090.000777
55.587775
2.149419
8.035927
1.726332
9
2875
240
12.87
17.56
1.69
4.62
28.89
7.54
34.31
30.75
...
3568.796
0.000
4153.663
9091448.017270
9.091448
316.231253
17.782892
1.300000
5.756474
0.989541
10
3405
405
14.95
20.65
1.62
4.50
28.77
4.55
22.51
31.60
...
2382.487
0.000
2996.087
951270.720561
0.951271
3579.422688
59.828277
1.272792
8.182957
0.963174
11
3480
465
11.39
16.05
3.88
5.09
21.84
2.48
29.34
32.92
...
1238.091
0.372
1881.960
937016.770046
0.937017
3713.914320
60.941893
1.969772
8.219842
1.585145
12
3695
350
16.98
17.58
3.78
6.51
32.29
6.08
30.39
39.95
...
2022.257
0.000
2624.009
837517.371630
0.837517
4411.848787
66.421749
1.944222
8.392049
1.564441
13
2455
740
22.05
40.56
6.72
7.03
32.50
10.89
34.18
42.96
...
3841.632
0.000
7820.790
713011.822853
0.713012
3443.140662
58.678281
2.592296
8.144139
2.043814
14
2015
70
9.09
13.14
1.96
4.27
27.27
1.83
19.05
33.87
...
6389.749
0.000
7535.359
9866131.466070
9.866131
204.234051
14.291048
1.400000
5.319267
1.085189
15
3135
900
28.57
40.07
1.90
9.59
52.22
10.17
47.30
41.76
...
1619.209
0.301
1874.706
853361.705082
0.853362
3673.705981
60.611104
1.378405
8.208956
1.064711
16
2910
345
13.02
19.43
1.54
5.11
22.62
10.39
39.13
44.00
...
3905.506
0.000
4932.050
13379330.017500
13.379330
217.499680
14.747870
1.240967
5.382197
0.932164
17
5730
245
7.69
13.53
1.48
4.98
11.45
6.01
28.76
36.99
...
1560.839
0.052
2818.292
5086580.499760
5.086580
1126.493526
33.563276
1.216553
7.026865
0.908259
18
5000
320
12.72
16.03
1.90
3.79
23.29
5.20
30.92
35.99
...
1075.066
0.593
2428.840
4342984.583920
4.342985
1151.282005
33.930547
1.378405
7.048631
1.064711
19
5260
345
7.28
12.67
1.62
6.37
7.09
1.58
14.25
41.50
...
1590.180
0.403
2802.429
3578709.525710
3.578710
1469.803560
38.338017
1.272792
7.292884
0.963174
20
2890
125
9.94
15.53
0.00
3.53
17.19
1.90
20.71
46.60
...
2862.347
0.000
4221.560
1849804.201020
1.849804
1562.327515
39.526289
0.000000
7.353932
0.000000
21
5230
305
11.04
17.29
3.35
3.83
15.67
2.93
17.42
38.76
...
1407.854
0.200
2152.738
2369436.973450
2.369437
2207.275424
46.981650
1.830301
7.699514
1.470176
22
5805
220
11.28
10.68
1.81
4.03
13.64
1.03
13.55
43.64
...
2922.658
0.000
3449.005
3209187.390690
3.209187
1808.869129
42.530802
1.345362
7.500457
1.033184
23
3805
280
7.66
11.70
1.58
6.55
25.00
1.26
12.79
46.01
...
1173.434
0.366
5037.844
6027027.535770
6.027028
631.322817
25.126138
1.256981
6.447817
0.947789
24
7095
545
11.11
19.96
2.26
5.00
12.18
3.95
20.83
37.68
...
1914.265
0.000
2191.347
3593206.001330
3.593206
1974.559766
44.436019
1.503330
7.588101
1.181727
25
4950
195
5.54
3.94
2.63
4.99
11.11
1.78
16.05
32.23
...
3091.123
0.000
3520.000
2391606.686270
2.391607
2069.738318
45.494377
1.621727
7.635177
1.289233
26
3915
315
8.80
6.47
3.19
6.14
11.63
4.76
24.76
35.78
...
1048.751
0.573
1547.606
1549435.551560
1.549436
2526.726585
50.266555
1.786057
7.834680
1.432701
27
4465
370
6.07
5.20
2.24
4.98
18.44
5.51
19.03
41.39
...
1849.464
0.101
2056.528
2111057.898290
2.111058
2115.053312
45.989709
1.496663
7.656835
1.175573
28
6900
660
10.28
34.65
1.74
5.15
16.55
4.68
22.68
43.36
...
1910.012
0.251
2966.820
3764527.986850
3.764528
1832.899111
42.812371
1.319091
7.513654
1.007958
29
6515
640
12.90
23.23
1.23
5.24
12.67
4.79
23.62
41.59
...
2483.024
0.014
3469.599
3324407.037440
3.324407
1959.747987
44.269041
1.109054
7.580571
0.802002
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
476
6550
1845
23.19
39.21
7.75
8.16
26.39
10.96
34.15
45.59
...
877.426
0.705
1437.637
734823.179419
0.734823
8913.709017
94.412441
2.783882
9.095346
2.169054
477
5270
1750
24.31
29.98
18.47
11.92
23.46
5.47
22.64
47.39
...
561.261
1.163
1258.810
502602.568060
0.502603
10485.421951
102.398349
4.297674
9.257741
2.968875
478
1985
485
16.33
37.08
12.85
13.22
16.95
2.19
15.63
53.42
...
750.169
0.726
1423.369
285920.848885
0.285921
6942.480787
83.321551
3.584690
8.845414
2.628285
479
2030
470
20.51
32.18
11.30
6.58
18.97
7.62
15.87
53.22
...
923.145
0.869
1615.122
218528.387758
0.218528
9289.410959
96.381590
3.361547
9.136630
2.509599
480
5925
1960
17.86
57.77
14.93
11.58
11.18
3.66
16.31
53.48
...
885.972
0.483
2069.203
427444.031562
0.427444
13861.463870
117.734718
3.863936
9.536868
2.768204
481
6755
3165
21.52
39.22
20.34
11.57
20.50
5.80
22.36
58.70
...
853.018
0.876
1291.353
639789.557930
0.639790
10558.159189
102.752904
4.509989
9.264654
3.060583
482
1770
535
20.00
37.21
4.55
10.13
11.81
0.57
5.10
68.57
...
950.109
0.476
1887.633
735486.836034
0.735487
2406.569245
49.056796
2.133073
7.785957
1.713798
483
5280
1845
17.84
56.97
12.49
9.15
11.99
3.34
13.66
59.60
...
625.647
0.896
1483.283
483320.651991
0.483321
10924.424558
104.519972
3.534119
9.298756
2.601949
484
5245
2285
21.65
25.29
15.43
12.56
29.50
17.84
45.32
48.64
...
533.086
1.000
1239.444
642602.494629
0.642602
8162.122064
90.344463
3.928104
9.007259
2.799109
485
4405
2150
25.59
44.89
23.50
14.18
34.57
13.45
40.21
49.35
...
673.831
1.284
1255.921
280239.882373
0.280240
15718.676309
125.374145
4.847680
9.662605
3.198673
486
4200
2515
20.47
25.00
25.79
17.88
28.21
12.82
43.99
53.37
...
728.800
1.161
1169.477
272399.165818
0.272399
15418.549420
124.171452
5.078386
9.643327
3.288029
487
4560
2050
20.35
21.61
15.08
12.27
20.44
9.73
32.97
51.98
...
521.841
1.864
1073.633
297430.482194
0.297430
15331.313611
123.819682
3.883298
9.637653
2.777576
488
6680
3360
30.66
28.40
19.13
15.72
32.42
16.16
50.43
49.57
...
638.312
2.127
1187.333
568249.816466
0.568250
11755.393150
108.422291
4.373786
9.372067
3.002211
489
3760
1230
13.79
34.38
13.83
10.94
42.35
9.56
35.40
47.31
...
1366.079
0.098
1731.825
2783551.648340
2.783552
1350.792252
36.753126
3.718871
7.208447
2.696652
490
6980
4195
28.35
42.35
24.96
20.78
36.36
14.62
44.53
45.39
...
517.741
2.363
1072.160
666920.455813
0.666920
10466.015758
102.303547
4.995998
9.255889
3.256557
491
6300
3105
22.89
45.63
18.55
13.25
14.29
6.04
21.48
55.57
...
748.844
0.926
1198.550
411891.147425
0.411891
15295.303236
123.674182
4.306971
9.635301
2.972975
492
5905
3060
20.82
38.35
22.52
19.08
21.70
8.14
31.22
56.50
...
597.490
1.156
1385.473
379895.637140
0.379896
15543.742604
124.674547
4.745524
9.651413
3.157851
493
6420
1610
16.18
35.45
8.72
8.93
26.24
7.54
32.02
50.36
...
555.531
1.000
1567.953
467594.778834
0.467595
13729.836796
117.174386
2.952965
9.527327
2.274186
494
2000
445
18.70
18.67
3.25
7.05
35.29
21.79
50.96
36.99
...
680.686
1.708
992.864
1186867.202320
1.186867
1685.108491
41.050073
1.802776
7.429585
1.446919
495
5905
1430
15.27
14.36
0.85
9.05
35.87
25.09
53.31
40.16
...
859.262
0.733
1487.549
1723335.771730
1.723336
3426.494185
58.536264
0.921954
8.139293
0.615186
496
4035
1010
21.81
22.22
1.73
8.97
41.05
23.26
50.00
43.82
...
562.301
1.349
1115.069
636813.026317
0.636813
6336.239733
79.600501
1.315295
8.754041
1.004302
497
6355
1080
19.51
14.93
0.79
8.48
32.62
19.05
50.28
50.92
...
1013.157
0.525
2540.538
2830958.514900
2.830959
2244.822722
47.379560
0.888819
7.716382
0.582216
498
5185
1010
13.65
44.77
0.67
5.80
27.84
25.55
54.36
38.80
...
1135.154
0.232
1653.413
2095456.905860
2.095457
2474.400683
49.743348
0.818535
7.813753
0.512824
499
6230
1630
21.11
14.96
2.81
8.50
27.83
26.88
54.20
41.24
...
813.239
0.865
1322.290
923087.615618
0.923088
6749.088488
82.152836
1.676305
8.817163
1.337629
500
3265
945
24.34
16.52
4.13
6.93
44.44
23.86
49.46
44.90
...
1080.564
0.504
1700.473
590580.089993
0.590580
5528.462702
74.353633
2.032240
8.617665
1.635106
501
4295
595
13.94
15.12
1.16
7.28
23.39
19.02
48.34
45.55
...
1556.652
0.000
2171.147
1099203.702020
1.099204
3907.374031
62.508992
1.077033
8.270621
0.770108
502
5200
1170
19.18
16.08
1.54
7.52
22.84
24.73
52.14
42.46
...
910.905
1.198
1141.828
973293.509048
0.973294
5342.684351
73.093668
1.240967
8.583483
0.932164
503
3100
605
16.67
19.39
1.13
9.18
27.66
15.60
49.41
41.67
...
1770.217
0.000
3386.877
974992.734046
0.974993
3179.510874
56.387152
1.063015
8.064483
0.756122
504
6005
630
13.49
10.53
0.58
6.53
28.26
11.59
44.46
35.32
...
3048.265
0.000
4077.862
10145932.866700
10.145933
591.862777
24.328230
0.761577
6.383275
0.457425
505
3395
865
28.50
32.78
0.00
8.00
51.11
18.81
59.50
42.34
...
1729.308
0.000
2876.342
12484526.672500
12.484527
271.936621
16.490501
0.000000
5.605569
0.000000
506 rows × 22 columns
In [ ]:
df.cov()
df.corr()
In [2]:
scs.ttest_ind?
In [6]:
#statsmodels.stats.anova.anova_lm
statsmodels.stats.anova.anova_lm?
Content source: giotta/EUR8217
Similar notebooks: