Labos en Python

Tous les labos, version Python. (Pas de labo 1.)


In [50]:
# jupyter magic
%matplotlib inline

# python scientific stack
import numpy as np
import pandas as pd
import scipy.stats as scs
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf

# fileformat
from simpledbf import Dbf5

Labo 2

Import data


In [8]:
# excel
#df = pd.read_excel('data/labo2/SR_Data.xls')

# DBF (Dbase)
dbf = Dbf5('data/labo2/SR_Data.dbf')
df = dbf.to_dataframe()

# SPSS
# savReaderWriter error with pip install

# SAS
# sas7dbat

Dataframe manipulation

  • show var (columns)
  • delete var
  • rename var
  • create var
  • head

In [10]:
# show vars
df.columns

# delete var
df = df.drop('Shape_Leng', 1) # 1 = column axis
# df.drop('Shape_Leng', 1, inplace=True) # same as previous, inplace impacts this dataframe instead of the returned one

# rename var
df = df.rename(columns={'POPTOT_FR':'POPTOT'})

# create var
df['km'] = df['Shape_Area'] / 1000000
df['HabKm2'] = df['POPTOT'] / df['km']

# show data head
df.head()


Out[10]:
POPTOT FAIBLEREV MONOPCT MENAGE1PCT IMMREC_PCT TX_CHOM NOECOLEPCT SCO_M9PCT SCO_M13PCT PARTIELPCT FAIBREVPCT INDICE_PAU Dist_Min N_1000 Dist_Moy_3 Shape_Area km HabKm2
0 970 35 11.48 16.67 1.03 1.80 8.00 6.16 24.66 45.13 3.61 0.49681 6264.772 0.000 8835.786 7483046.586030 7.483047 129.626348
1 9105 2965 21.74 24.22 5.43 10.36 30.42 9.64 34.44 41.68 32.56 1.49218 1458.956 0.179 3352.854 2958949.272110 2.958949 3077.105811
2 4190 435 13.93 26.59 3.10 4.55 22.69 3.76 28.84 40.94 10.38 0.69996 1094.887 0.372 1862.379 1452462.958910 1.452463 2884.755149
3 1300 335 22.95 60.36 0.77 8.89 68.75 7.23 36.60 33.33 25.77 1.15688 1155.835 0.348 1826.470 683634.529876 0.683635 1901.600845
4 6270 1010 15.47 21.96 3.43 7.52 29.31 4.59 33.22 45.08 16.11 0.89715 1097.945 0.590 1652.041 1764655.049240 1.764655 3553.102349

Normality

Skewness


In [65]:
#scs.skew(df)
df.skew()


Out[65]:
POPTOT        0.460748
FAIBLEREV     1.305112
MONOPCT       0.318384
MENAGE1PCT   -0.195568
IMMREC_PCT    1.889274
TX_CHOM       2.071395
NOECOLEPCT    0.131756
SCO_M9PCT     0.255036
SCO_M13PCT   -0.209523
PARTIELPCT    0.440455
FAIBREVPCT    0.357728
INDICE_PAU    0.294139
Dist_Min      3.526814
N_1000        0.956193
Dist_Moy_3    3.622690
Shape_Area    8.012661
km            8.012661
HabKm2        1.546871
SqrtDens      0.170539
SqrtImg       0.608161
LogDens      -1.356552
LogImg        0.028800
dtype: float64

Kurtosis


In [63]:
df.kurt()  # or df.kurtosis()


Out[63]:
POPTOT        -0.260861
FAIBLEREV      2.206736
MONOPCT        0.536391
MENAGE1PCT    -0.348104
IMMREC_PCT     3.909355
TX_CHOM       10.961608
NOECOLEPCT    -0.121454
SCO_M9PCT     -0.745866
SCO_M13PCT    -0.958561
PARTIELPCT     0.566064
FAIBREVPCT    -0.123080
INDICE_PAU     0.094835
Dist_Min      19.814668
N_1000         0.816666
Dist_Moy_3    21.412225
Shape_Area    88.786170
km            88.786170
HabKm2         5.721056
SqrtDens       0.353084
SqrtImg        0.683489
LogDens        2.847751
LogImg        -0.144432
dtype: float64

Kolmogorov-Smirnov


In [54]:
df['HabKm2'].plot()


Out[54]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f641d933208>

In [51]:
scs.kstest(df['SqrtDens'], 'norm')


Out[51]:
KstestResult(statistic=1.0, pvalue=0.0)

Shapiro-Wilk


In [ ]:
scs.shapiro()

Transformations

Square root


In [13]:
df['SqrtDens'] = np.sqrt(df['HabKm2'])
df['SqrtImg'] = np.sqrt(df['IMMREC_PCT'])

Logarithmic


In [14]:
# log(0) = error
df['LogDens'] = np.log(df['HabKm2'])
df['LogImg'] = np.log(df['IMMREC_PCT'] + 1)

Centrage et réduction

Descriptive statistics


In [16]:
df.describe()


Out[16]:
POPTOT FAIBLEREV MONOPCT MENAGE1PCT IMMREC_PCT TX_CHOM NOECOLEPCT SCO_M9PCT SCO_M13PCT PARTIELPCT ... Dist_Min N_1000 Dist_Moy_3 Shape_Area km HabKm2 SqrtDens SqrtImg LogDens LogImg
count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 ... 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000
mean 3499.545455 1015.345850 21.383597 37.674071 5.199427 9.455593 32.656047 14.634644 39.424012 45.614526 ... 909.695781 1.219806 1489.524081 962203.710779 0.962204 7996.216339 84.075317 2.074328 8.699663 1.588005
std 1621.165878 674.211405 7.843321 12.939146 4.678498 4.546150 12.192593 8.455721 15.275392 6.515781 ... 667.547371 0.985640 861.347812 1990476.202097 1.990476 5469.201650 30.485967 0.947822 0.883840 0.688674
min 245.000000 10.000000 0.000000 3.940000 0.000000 0.000000 0.000000 0.000000 5.100000 30.650000 ... 182.540000 0.000000 422.322000 38220.592566 0.038221 123.982571 11.134746 0.000000 4.820141 0.000000
25% 2241.250000 521.250000 16.052500 28.585000 2.112500 6.592500 24.515000 7.560000 27.932500 41.255000 ... 534.706250 0.477750 1013.665000 238109.266525 0.238109 3858.921403 62.119965 1.453443 8.258127 1.135425
50% 3327.500000 900.000000 21.225000 38.600000 3.850000 8.555000 32.655000 14.230000 40.260000 45.465000 ... 728.893000 1.000000 1262.812500 482166.445382 0.482166 6901.880415 83.077555 1.962140 8.839549 1.578977
75% 4543.750000 1330.000000 26.185000 46.755000 6.472500 11.670000 40.932500 20.967500 52.140000 49.652500 ... 1049.549000 1.807750 1634.273500 936821.000361 0.936821 11370.808679 106.633992 2.544110 9.338805 2.011228
max 9105.000000 4195.000000 51.280000 72.630000 25.790000 47.440000 68.750000 37.050000 70.490000 69.790000 ... 6389.749000 5.564000 8835.786000 28875026.240400 28.875026 44776.971544 211.605698 5.078386 10.709449 3.288029

8 rows × 22 columns


In [31]:
df.mean()
df.std()
df.min()
df.max()
df.median()
#df.range() : min, max
df.quantile(0.75) # param : 0.25, 0.75... default 0.5


Out[31]:
POPTOT          4543.750000
FAIBLEREV       1330.000000
MONOPCT           26.185000
MENAGE1PCT        46.755000
IMMREC_PCT         6.472500
TX_CHOM           11.670000
NOECOLEPCT        40.932500
SCO_M9PCT         20.967500
SCO_M13PCT        52.140000
PARTIELPCT        49.652500
FAIBREVPCT        39.800000
INDICE_PAU         1.930343
Dist_Min        1049.549000
N_1000             1.807750
Dist_Moy_3      1634.273500
Shape_Area    936821.000361
km                 0.936821
HabKm2         11370.808679
SqrtDens         106.633992
SqrtImg            2.544110
LogDens            9.338805
LogImg             2.011228
dtype: float64

In [34]:
df.


Out[34]:
POPTOT FAIBLEREV MONOPCT MENAGE1PCT IMMREC_PCT TX_CHOM NOECOLEPCT SCO_M9PCT SCO_M13PCT PARTIELPCT ... Dist_Min N_1000 Dist_Moy_3 Shape_Area km HabKm2 SqrtDens SqrtImg LogDens LogImg
0 970 35 11.48 16.67 1.03 1.80 8.00 6.16 24.66 45.13 ... 6264.772 0.000 8835.786 7483046.586030 7.483047 129.626348 11.385357 1.014889 4.864656 0.708036
1 9105 2965 21.74 24.22 5.43 10.36 30.42 9.64 34.44 41.68 ... 1458.956 0.179 3352.854 2958949.272110 2.958949 3077.105811 55.471667 2.330236 8.031745 1.860975
2 4190 435 13.93 26.59 3.10 4.55 22.69 3.76 28.84 40.94 ... 1094.887 0.372 1862.379 1452462.958910 1.452463 2884.755149 53.709917 1.760682 7.967195 1.410987
3 1300 335 22.95 60.36 0.77 8.89 68.75 7.23 36.60 33.33 ... 1155.835 0.348 1826.470 683634.529876 0.683635 1901.600845 43.607349 0.877496 7.550451 0.570980
4 6270 1010 15.47 21.96 3.43 7.52 29.31 4.59 33.22 45.08 ... 1097.945 0.590 1652.041 1764655.049240 1.764655 3553.102349 59.607905 1.852026 8.175576 1.488400
5 4340 935 16.88 23.00 3.11 4.61 25.00 6.61 34.03 47.60 ... 705.672 1.075 1343.423 1105846.567120 1.105847 3924.595083 62.646589 1.763519 8.275018 1.413423
6 4255 1025 19.82 33.82 7.40 7.53 24.77 9.62 39.10 39.95 ... 889.811 0.767 1360.426 1233814.262970 1.233814 3448.655221 58.725252 2.720294 8.145740 2.128232
7 5440 545 17.41 21.61 1.84 5.93 29.37 5.83 31.05 37.66 ... 747.516 1.122 1227.202 1964415.375340 1.964415 2769.271748 52.623871 1.356466 7.926340 1.043804
8 5840 725 17.71 12.66 4.62 3.02 27.52 7.70 32.17 39.34 ... 1725.722 0.000 2126.513 1889967.162310 1.889967 3090.000777 55.587775 2.149419 8.035927 1.726332
9 2875 240 12.87 17.56 1.69 4.62 28.89 7.54 34.31 30.75 ... 3568.796 0.000 4153.663 9091448.017270 9.091448 316.231253 17.782892 1.300000 5.756474 0.989541
10 3405 405 14.95 20.65 1.62 4.50 28.77 4.55 22.51 31.60 ... 2382.487 0.000 2996.087 951270.720561 0.951271 3579.422688 59.828277 1.272792 8.182957 0.963174
11 3480 465 11.39 16.05 3.88 5.09 21.84 2.48 29.34 32.92 ... 1238.091 0.372 1881.960 937016.770046 0.937017 3713.914320 60.941893 1.969772 8.219842 1.585145
12 3695 350 16.98 17.58 3.78 6.51 32.29 6.08 30.39 39.95 ... 2022.257 0.000 2624.009 837517.371630 0.837517 4411.848787 66.421749 1.944222 8.392049 1.564441
13 2455 740 22.05 40.56 6.72 7.03 32.50 10.89 34.18 42.96 ... 3841.632 0.000 7820.790 713011.822853 0.713012 3443.140662 58.678281 2.592296 8.144139 2.043814
14 2015 70 9.09 13.14 1.96 4.27 27.27 1.83 19.05 33.87 ... 6389.749 0.000 7535.359 9866131.466070 9.866131 204.234051 14.291048 1.400000 5.319267 1.085189
15 3135 900 28.57 40.07 1.90 9.59 52.22 10.17 47.30 41.76 ... 1619.209 0.301 1874.706 853361.705082 0.853362 3673.705981 60.611104 1.378405 8.208956 1.064711
16 2910 345 13.02 19.43 1.54 5.11 22.62 10.39 39.13 44.00 ... 3905.506 0.000 4932.050 13379330.017500 13.379330 217.499680 14.747870 1.240967 5.382197 0.932164
17 5730 245 7.69 13.53 1.48 4.98 11.45 6.01 28.76 36.99 ... 1560.839 0.052 2818.292 5086580.499760 5.086580 1126.493526 33.563276 1.216553 7.026865 0.908259
18 5000 320 12.72 16.03 1.90 3.79 23.29 5.20 30.92 35.99 ... 1075.066 0.593 2428.840 4342984.583920 4.342985 1151.282005 33.930547 1.378405 7.048631 1.064711
19 5260 345 7.28 12.67 1.62 6.37 7.09 1.58 14.25 41.50 ... 1590.180 0.403 2802.429 3578709.525710 3.578710 1469.803560 38.338017 1.272792 7.292884 0.963174
20 2890 125 9.94 15.53 0.00 3.53 17.19 1.90 20.71 46.60 ... 2862.347 0.000 4221.560 1849804.201020 1.849804 1562.327515 39.526289 0.000000 7.353932 0.000000
21 5230 305 11.04 17.29 3.35 3.83 15.67 2.93 17.42 38.76 ... 1407.854 0.200 2152.738 2369436.973450 2.369437 2207.275424 46.981650 1.830301 7.699514 1.470176
22 5805 220 11.28 10.68 1.81 4.03 13.64 1.03 13.55 43.64 ... 2922.658 0.000 3449.005 3209187.390690 3.209187 1808.869129 42.530802 1.345362 7.500457 1.033184
23 3805 280 7.66 11.70 1.58 6.55 25.00 1.26 12.79 46.01 ... 1173.434 0.366 5037.844 6027027.535770 6.027028 631.322817 25.126138 1.256981 6.447817 0.947789
24 7095 545 11.11 19.96 2.26 5.00 12.18 3.95 20.83 37.68 ... 1914.265 0.000 2191.347 3593206.001330 3.593206 1974.559766 44.436019 1.503330 7.588101 1.181727
25 4950 195 5.54 3.94 2.63 4.99 11.11 1.78 16.05 32.23 ... 3091.123 0.000 3520.000 2391606.686270 2.391607 2069.738318 45.494377 1.621727 7.635177 1.289233
26 3915 315 8.80 6.47 3.19 6.14 11.63 4.76 24.76 35.78 ... 1048.751 0.573 1547.606 1549435.551560 1.549436 2526.726585 50.266555 1.786057 7.834680 1.432701
27 4465 370 6.07 5.20 2.24 4.98 18.44 5.51 19.03 41.39 ... 1849.464 0.101 2056.528 2111057.898290 2.111058 2115.053312 45.989709 1.496663 7.656835 1.175573
28 6900 660 10.28 34.65 1.74 5.15 16.55 4.68 22.68 43.36 ... 1910.012 0.251 2966.820 3764527.986850 3.764528 1832.899111 42.812371 1.319091 7.513654 1.007958
29 6515 640 12.90 23.23 1.23 5.24 12.67 4.79 23.62 41.59 ... 2483.024 0.014 3469.599 3324407.037440 3.324407 1959.747987 44.269041 1.109054 7.580571 0.802002
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
476 6550 1845 23.19 39.21 7.75 8.16 26.39 10.96 34.15 45.59 ... 877.426 0.705 1437.637 734823.179419 0.734823 8913.709017 94.412441 2.783882 9.095346 2.169054
477 5270 1750 24.31 29.98 18.47 11.92 23.46 5.47 22.64 47.39 ... 561.261 1.163 1258.810 502602.568060 0.502603 10485.421951 102.398349 4.297674 9.257741 2.968875
478 1985 485 16.33 37.08 12.85 13.22 16.95 2.19 15.63 53.42 ... 750.169 0.726 1423.369 285920.848885 0.285921 6942.480787 83.321551 3.584690 8.845414 2.628285
479 2030 470 20.51 32.18 11.30 6.58 18.97 7.62 15.87 53.22 ... 923.145 0.869 1615.122 218528.387758 0.218528 9289.410959 96.381590 3.361547 9.136630 2.509599
480 5925 1960 17.86 57.77 14.93 11.58 11.18 3.66 16.31 53.48 ... 885.972 0.483 2069.203 427444.031562 0.427444 13861.463870 117.734718 3.863936 9.536868 2.768204
481 6755 3165 21.52 39.22 20.34 11.57 20.50 5.80 22.36 58.70 ... 853.018 0.876 1291.353 639789.557930 0.639790 10558.159189 102.752904 4.509989 9.264654 3.060583
482 1770 535 20.00 37.21 4.55 10.13 11.81 0.57 5.10 68.57 ... 950.109 0.476 1887.633 735486.836034 0.735487 2406.569245 49.056796 2.133073 7.785957 1.713798
483 5280 1845 17.84 56.97 12.49 9.15 11.99 3.34 13.66 59.60 ... 625.647 0.896 1483.283 483320.651991 0.483321 10924.424558 104.519972 3.534119 9.298756 2.601949
484 5245 2285 21.65 25.29 15.43 12.56 29.50 17.84 45.32 48.64 ... 533.086 1.000 1239.444 642602.494629 0.642602 8162.122064 90.344463 3.928104 9.007259 2.799109
485 4405 2150 25.59 44.89 23.50 14.18 34.57 13.45 40.21 49.35 ... 673.831 1.284 1255.921 280239.882373 0.280240 15718.676309 125.374145 4.847680 9.662605 3.198673
486 4200 2515 20.47 25.00 25.79 17.88 28.21 12.82 43.99 53.37 ... 728.800 1.161 1169.477 272399.165818 0.272399 15418.549420 124.171452 5.078386 9.643327 3.288029
487 4560 2050 20.35 21.61 15.08 12.27 20.44 9.73 32.97 51.98 ... 521.841 1.864 1073.633 297430.482194 0.297430 15331.313611 123.819682 3.883298 9.637653 2.777576
488 6680 3360 30.66 28.40 19.13 15.72 32.42 16.16 50.43 49.57 ... 638.312 2.127 1187.333 568249.816466 0.568250 11755.393150 108.422291 4.373786 9.372067 3.002211
489 3760 1230 13.79 34.38 13.83 10.94 42.35 9.56 35.40 47.31 ... 1366.079 0.098 1731.825 2783551.648340 2.783552 1350.792252 36.753126 3.718871 7.208447 2.696652
490 6980 4195 28.35 42.35 24.96 20.78 36.36 14.62 44.53 45.39 ... 517.741 2.363 1072.160 666920.455813 0.666920 10466.015758 102.303547 4.995998 9.255889 3.256557
491 6300 3105 22.89 45.63 18.55 13.25 14.29 6.04 21.48 55.57 ... 748.844 0.926 1198.550 411891.147425 0.411891 15295.303236 123.674182 4.306971 9.635301 2.972975
492 5905 3060 20.82 38.35 22.52 19.08 21.70 8.14 31.22 56.50 ... 597.490 1.156 1385.473 379895.637140 0.379896 15543.742604 124.674547 4.745524 9.651413 3.157851
493 6420 1610 16.18 35.45 8.72 8.93 26.24 7.54 32.02 50.36 ... 555.531 1.000 1567.953 467594.778834 0.467595 13729.836796 117.174386 2.952965 9.527327 2.274186
494 2000 445 18.70 18.67 3.25 7.05 35.29 21.79 50.96 36.99 ... 680.686 1.708 992.864 1186867.202320 1.186867 1685.108491 41.050073 1.802776 7.429585 1.446919
495 5905 1430 15.27 14.36 0.85 9.05 35.87 25.09 53.31 40.16 ... 859.262 0.733 1487.549 1723335.771730 1.723336 3426.494185 58.536264 0.921954 8.139293 0.615186
496 4035 1010 21.81 22.22 1.73 8.97 41.05 23.26 50.00 43.82 ... 562.301 1.349 1115.069 636813.026317 0.636813 6336.239733 79.600501 1.315295 8.754041 1.004302
497 6355 1080 19.51 14.93 0.79 8.48 32.62 19.05 50.28 50.92 ... 1013.157 0.525 2540.538 2830958.514900 2.830959 2244.822722 47.379560 0.888819 7.716382 0.582216
498 5185 1010 13.65 44.77 0.67 5.80 27.84 25.55 54.36 38.80 ... 1135.154 0.232 1653.413 2095456.905860 2.095457 2474.400683 49.743348 0.818535 7.813753 0.512824
499 6230 1630 21.11 14.96 2.81 8.50 27.83 26.88 54.20 41.24 ... 813.239 0.865 1322.290 923087.615618 0.923088 6749.088488 82.152836 1.676305 8.817163 1.337629
500 3265 945 24.34 16.52 4.13 6.93 44.44 23.86 49.46 44.90 ... 1080.564 0.504 1700.473 590580.089993 0.590580 5528.462702 74.353633 2.032240 8.617665 1.635106
501 4295 595 13.94 15.12 1.16 7.28 23.39 19.02 48.34 45.55 ... 1556.652 0.000 2171.147 1099203.702020 1.099204 3907.374031 62.508992 1.077033 8.270621 0.770108
502 5200 1170 19.18 16.08 1.54 7.52 22.84 24.73 52.14 42.46 ... 910.905 1.198 1141.828 973293.509048 0.973294 5342.684351 73.093668 1.240967 8.583483 0.932164
503 3100 605 16.67 19.39 1.13 9.18 27.66 15.60 49.41 41.67 ... 1770.217 0.000 3386.877 974992.734046 0.974993 3179.510874 56.387152 1.063015 8.064483 0.756122
504 6005 630 13.49 10.53 0.58 6.53 28.26 11.59 44.46 35.32 ... 3048.265 0.000 4077.862 10145932.866700 10.145933 591.862777 24.328230 0.761577 6.383275 0.457425
505 3395 865 28.50 32.78 0.00 8.00 51.11 18.81 59.50 42.34 ... 1729.308 0.000 2876.342 12484526.672500 12.484527 271.936621 16.490501 0.000000 5.605569 0.000000

506 rows × 22 columns


Labo 3

Histograms

  • with normal curve

In [ ]:
df.cov()
df.corr()

Labo 4

T-Test


In [2]:
scs.ttest_ind?

In [6]:
#statsmodels.stats.anova.anova_lm
statsmodels.stats.anova.anova_lm?