In [22]:
import pandas as pd
import pymc3 as pm
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from pandas.plotting import scatter_matrix

sns.set(font_scale=1.5)

%matplotlib inline

In [32]:
df = pd.read_csv('BodyFat.csv.gz', index_col='IDNO')
df = df.drop(columns=[df.columns[-1]])
# df['WEIGHT'].dtype
df.head()


Out[32]:
BODYFAT DENSITY AGE WEIGHT HEIGHT ADIPOSITY NECK CHEST ABDOMEN HIP THIGH KNEE ANKLE BICEPS FOREARM WRIST
IDNO
1 12.6 1.0708 23 154.25 67.75 23.7 36.2 93.1 85.2 94.5 59.0 37.3 21.9 32.0 27.4 17.1
2 6.9 1.0853 22 173.25 72.25 23.4 38.5 93.6 83.0 98.7 58.7 37.3 23.4 30.5 28.9 18.2
3 24.6 1.0414 22 154.00 66.25 24.7 34.0 95.8 87.9 99.2 59.6 38.9 24.0 28.8 25.2 16.6
4 10.9 1.0751 26 184.75 72.25 24.9 37.4 101.8 86.4 101.2 60.1 37.3 22.8 32.4 29.4 18.2
5 27.8 1.0340 24 184.25 71.25 25.6 34.4 97.3 100.0 101.9 63.2 42.2 24.0 32.2 27.7 17.7

In [37]:
scatter_matrix(df, figsize=(15,15));
# plt.tight_layout()



In [39]:
with pm.Model() as model:
        pm.glm.GLM.from_formula('BODYFAT ~ NECK', df)
        trace = pm.sample(5000)


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sd, NECK, Intercept]
Sampling 2 chains: 100%|██████████| 11000/11000 [00:38<00:00, 288.22draws/s]
The acceptance probability does not match the target. It is 0.881957417372123, but should be close to 0.8. Try to increase the number of tuning steps.

In [40]:
pm.traceplot(trace)


Out[40]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1398c19e8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x13989e828>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x13a9c6240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x13af54cf8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1397f7b38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1397cd748>]],
      dtype=object)

In [41]:
pm.summary(trace)


/Users/balarsen/miniconda3/envs/python3/lib/python3.7/site-packages/pymc3/stats.py:991: FutureWarning: The join_axes-keyword is deprecated. Use .reindex or .reindex_like on the result to achieve the same functionality.
  axis=1, join_axes=[dforg.index])
Out[41]:
mean sd mc_error hpd_2.5 hpd_97.5 n_eff Rhat
Intercept -40.550860 6.648520 0.110036 -53.493330 -27.149298 3286.806787 1.000125
NECK 1.566116 0.174557 0.002886 1.210995 1.899941 3298.589715 1.000102
sd 6.780941 0.305182 0.004715 6.194300 7.384174 4061.717908 0.999980

In [42]:
with pm.Model() as model:
        pm.glm.GLM.from_formula('BODYFAT ~ NECK + THIGH', df)
        trace = pm.sample(5000)


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sd, THIGH, NECK, Intercept]
Sampling 2 chains: 100%|██████████| 11000/11000 [00:57<00:00, 192.85draws/s]
The acceptance probability does not match the target. It is 0.8810841491348281, but should be close to 0.8. Try to increase the number of tuning steps.

In [45]:
pm.traceplot(trace);



In [46]:
pm.summary(trace)


/Users/balarsen/miniconda3/envs/python3/lib/python3.7/site-packages/pymc3/stats.py:991: FutureWarning: The join_axes-keyword is deprecated. Use .reindex or .reindex_like on the result to achieve the same functionality.
  axis=1, join_axes=[dforg.index])
Out[46]:
mean sd mc_error hpd_2.5 hpd_97.5 n_eff Rhat
Intercept -42.171139 6.461887 0.088100 -54.004086 -28.727532 4480.763023 1.000015
NECK 0.628805 0.232674 0.003718 0.177064 1.078297 3509.486113 0.999930
THIGH 0.626521 0.108216 0.001616 0.421239 0.842040 4856.969789 0.999909
sd 6.374960 0.290499 0.003413 5.814857 6.933320 6243.781053 0.999900

In [47]:
with pm.Model() as model:
        pm.glm.GLM.from_formula('BODYFAT ~ NECK + THIGH + DENSITY', df)
        trace = pm.sample(5000)


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sd, DENSITY, THIGH, NECK, Intercept]
Sampling 2 chains: 100%|██████████| 11000/11000 [03:42<00:00, 49.34draws/s]
The acceptance probability does not match the target. It is 0.8993518484456472, but should be close to 0.8. Try to increase the number of tuning steps.

In [48]:
pm.traceplot(trace);