In [24]:
import numpy as np
import matplotlib.pyplot as plt
import sghmc

In [25]:
pima = np.genfromtxt('pima-indians-diabetes.data', delimiter=',')
names = ["Number of times pregnant",
         "Plasma glucose concentration",
         "Diastolic blood pressure (mm Hg)",
         "Triceps skin fold thickness (mm)",
         "2-Hour serum insulin (mu U/ml)",
         "Body mass index (weight in kg/(height in m)^2)",
         "Diabetes pedigree function",
         "Age (years)",
         "Class variable (0 or 1)"]

In [26]:
# Load data
X = np.concatenate((np.ones((pima.shape[0],1)),pima[:,0:8]), axis=1)
Y = pima[:,8]

Xs = (X - np.mean(X, axis=0))/np.concatenate((np.ones(1),np.std(X[:,1:], axis=0)))
Xs = Xs[:,1:]

n, p = Xs.shape

In [ ]:


In [ ]:


In [ ]:

Regression


In [27]:
from sklearn.linear_model import LogisticRegression

In [28]:
# Unscaled
mod_logis = LogisticRegression(fit_intercept=False, C=1e50)
mod_logis.fit(X,Y)
beta_true_unscale = mod_logis.coef_.ravel()
beta_true_unscale


Out[28]:
array([ -8.39796146e+00,   1.23136819e-01,   3.51459222e-02,
        -1.33041552e-02,   6.22963210e-04,  -1.19046614e-03,
         8.96178024e-02,   9.44237454e-01,   1.48531935e-02])

In [29]:
# Scaled
mod_logis = LogisticRegression(fit_intercept=False, C=1e50)
mod_logis.fit(Xs,Y)
beta_true_scale = mod_logis.coef_.ravel()
beta_true_scale


Out[29]:
array([ 0.39024907,  1.08791914, -0.24544979,  0.02250608, -0.1621995 ,
        0.59035938,  0.32483104,  0.12120845])

In [ ]:


In [ ]:


In [ ]:

HMC


In [30]:
# HMC - Scaled
nsample = 1000
m = 20
eps = .001
theta = np.zeros(p)
#theta = beta_true_unscale.copy()
phi = 5
M = np.identity(p)

samples_hmc, accept, rho, H_hmc = sghmc.run_hmc(Y, Xs, sghmc.U_logistic, sghmc.gradU_logistic, M, eps, m, theta, phi, nsample)

beta_est_hmc = np.mean(samples_hmc, axis=0)
beta_est_hmc - beta_true_scale


Out[30]:
array([-0.10352308, -0.11663609,  0.09055114,  0.05192659,  0.03134791,
       -0.09743889, -0.02943009,  0.02173958])

In [31]:
plt.plot((samples_hmc - beta_true_scale)[:,1])
plt.savefig('hmc-trace.pdf')

In [32]:
fig, ax = plt.subplots(figsize=(4,3))
ax.plot(H)
ax.set_title("Total energy")
ax.set_xlabel("Number of samples")
plt.savefig('hmc-energy.pdf')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-8555b9c90a04> in <module>()
      1 fig, ax = plt.subplots(figsize=(4,3))
----> 2 ax.plot(H)
      3 ax.set_title("Total energy")
      4 ax.set_xlabel("Number of samples")
      5 plt.savefig('hmc-energy.pdf')

NameError: name 'H' is not defined

In [ ]:
fig, ax = plt.subplots(3,1, figsize=(6,10))

i = 0
ax[0].plot((samples - beta_true_scale)[:,i])
ax[0].set_title(names[i])


i = 1
ax[1].plot((samples - beta_true_scale)[:,i])
ax[1].set_title(names[i])


i = 2
ax[2].plot((samples - beta_true_scale)[:,i])
ax[2].set_title(names[i])

In [ ]:

SGHMC


In [37]:
# HMC - Scaled (no intercept)
nsample = 1000
m = 20
eps = .002
theta = np.zeros(p)
#theta = beta_true_scale.copy()
phi = 5
nbatch = 500
C = 1 * np.identity(p)
V = 0 * np.identity(p)
M = np.identity(p)

samples_sghmc, H_sghmc = sghmc.run_sghmc(Y, Xs, sghmc.U_logistic, sghmc.stogradU_logistic, M, eps, m, theta, C, V, phi, nsample, nbatch)

beta_est_sghmc = np.mean(samples_sghmc, axis=0)
beta_est_sghmc - beta_true_scale


Out[37]:
array([-0.018941  , -0.07752244,  0.07161102, -0.02195963,  0.0348491 ,
       -0.04713756, -0.04273773, -0.01442177])

In [ ]:
plt.plot((samples - beta_true_scale)[:,0])
plt.savefig('sghmc-trace.pdf')

In [ ]:
plt.plot(H)
plt.savefig('sghmc-energy.pdf')

In [ ]:
fig, ax = plt.subplots(3,1, figsize=(6,10))

i = 0
ax[0].plot((samples - beta_true_scale)[:,i])
ax[0].set_title(names[i])


i = 1
ax[1].plot((samples - beta_true_scale)[:,i])
ax[1].set_title(names[i])


i = 2
ax[2].plot((samples - beta_true_scale)[:,i])
ax[2].set_title(names[i])

Gradient Descent


In [ ]:
# Gradient descent - Scaled
np.random.seed(2)
phi = .1

beta_est_gd = sghmc.gd(Y, Xs, sghmc.gradU_logistic, .1, 10000, np.zeros(p), phi)

beta_est_gd - beta_true_scale

In [ ]:


In [ ]:

Plots


In [38]:
import seaborn as sns
import pandas as pd

In [48]:
import matplotlib.pyplot as plt
import pandas as pd

df = pd.DataFrame([[1,2,3],[7,0,3],[1,2,2]],columns=['col1','col2','col3'])

df.plot()
plt.show


Out[48]:
<function matplotlib.pyplot.show>

In [39]:
tit = sns.load_dataset("titanic")

In [49]:
df = pd.DataFrame(np.vstack((beta_true_scale, 
                  beta_est_hmc, 
                  beta_est_sghmc, 
                  beta_est_gd)).T,
                  columns=['reg','hmc','sghmc','gd'])
df.plot()
ax.set_title("Coefficient Estimates")
ax.set_xlabel("Coefficient")
plt.savefig('coefs-pima.pdf')
plt.show()



In [122]:
df = pd.DataFrame({'reg': beta_true_scale,
              'hmc': beta_est_hmc,
              'sghmc': beta_est_sghmc,
              'gd': beta_est_gd,
              'name':names[:-1]})
that = (85, 62, 54, 20)
means_reg = tuple(df[["reg"]].values.tolist())
int(means_reg)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-122-10ff740e6c77> in <module>()
      6 that = (85, 62, 54, 20)
      7 means_reg = tuple(df[["reg"]].values.tolist())
----> 8 int(means_reg)

TypeError: int() argument must be a string, a bytes-like object or a number, not 'tuple'

In [120]:
# data to plot
n_groups = 8
means_reg = tuple(df[["reg"]].values.tolist())
means_sghmc = tuple(df[["sghmc"]].values.tolist())
means_gd = tuple(df[["gd"]].values.tolist())
means_hmc = tuple(df[["hmc"]].values.tolist())
nam = tuple(df[["name"]].values.tolist())



 
# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.35
opacity = 0.8
 
rects1 = plt.bar(index, means_reg, bar_width,
                 alpha=opacity,
                 color='b',
                 label='MLE')
 
rects2 = plt.bar(index+bar_width, means_gd, bar_width,
                 alpha=opacity,
                 color='g',
                 label='Gradient Descent')

rects3 = plt.bar(index, means_hmc, bar_width,
                 alpha=opacity,
                 color='r',
                 label='Hamiltonian Monte Carlo')

rects4 = plt.bar(index, means_sghmc, bar_width,
                 alpha=opacity,
                 color='y',
                 label='Stochastic Gradient HMC')
 
plt.xlabel('Coefficient')
plt.ylabel('Value')
plt.title('Scores by person')
plt.xticks(index + bar_width, nam)
plt.legend()
 
plt.tight_layout()
plt.show()


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-120-e70e97136c0b> in <module>()
     19                  alpha=opacity,
     20                  color='b',
---> 21                  label='MLE')
     22 
     23 rects2 = plt.bar(index+bar_width, means_gd, bar_width,

//anaconda/lib/python3.6/site-packages/matplotlib/pyplot.py in bar(left, height, width, bottom, hold, data, **kwargs)
   2703     try:
   2704         ret = ax.bar(left, height, width=width, bottom=bottom, data=data,
-> 2705                      **kwargs)
   2706     finally:
   2707         ax._hold = washold

//anaconda/lib/python3.6/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
   1890                     warnings.warn(msg % (label_namer, func.__name__),
   1891                                   RuntimeWarning, stacklevel=2)
-> 1892             return func(ax, *args, **kwargs)
   1893         pre_doc = inner.__doc__
   1894         if pre_doc is None:

//anaconda/lib/python3.6/site-packages/matplotlib/axes/_axes.py in bar(self, left, height, width, bottom, **kwargs)
   2113         args = zip(left, bottom, width, height, color, edgecolor, linewidth)
   2114         for l, b, w, h, c, e, lw in args:
-> 2115             if h < 0:
   2116                 b += h
   2117                 h = abs(h)

TypeError: '<' not supported between instances of 'list' and 'int'

In [98]:
# data to plot
n_groups = 4
means_frank = (90, 55, 40, 65)
means_guido = (85, 62, 54, 20)
 
# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.35
opacity = 0.8
 
rects1 = plt.bar(index, means_frank, bar_width,
                 alpha=opacity,
                 color='b',
                 label='Frank')
 
rects2 = plt.bar(index + bar_width, means_guido, bar_width,
                 alpha=opacity,
                 color='g',
                 label='Guido')
 
plt.xlabel('Person')
plt.ylabel('Scores')
plt.title('Scores by person')
plt.xticks(index + bar_width, ('A', 'B', 'C', 'D'))
plt.legend()
 
plt.tight_layout()
plt.show()



In [ ]: