In [24]:
import numpy as np
import matplotlib.pyplot as plt
import sghmc
In [25]:
pima = np.genfromtxt('pima-indians-diabetes.data', delimiter=',')
names = ["Number of times pregnant",
"Plasma glucose concentration",
"Diastolic blood pressure (mm Hg)",
"Triceps skin fold thickness (mm)",
"2-Hour serum insulin (mu U/ml)",
"Body mass index (weight in kg/(height in m)^2)",
"Diabetes pedigree function",
"Age (years)",
"Class variable (0 or 1)"]
In [26]:
# Load data
X = np.concatenate((np.ones((pima.shape[0],1)),pima[:,0:8]), axis=1)
Y = pima[:,8]
Xs = (X - np.mean(X, axis=0))/np.concatenate((np.ones(1),np.std(X[:,1:], axis=0)))
Xs = Xs[:,1:]
n, p = Xs.shape
In [ ]:
In [ ]:
In [ ]:
In [27]:
from sklearn.linear_model import LogisticRegression
In [28]:
# Unscaled
mod_logis = LogisticRegression(fit_intercept=False, C=1e50)
mod_logis.fit(X,Y)
beta_true_unscale = mod_logis.coef_.ravel()
beta_true_unscale
Out[28]:
In [29]:
# Scaled
mod_logis = LogisticRegression(fit_intercept=False, C=1e50)
mod_logis.fit(Xs,Y)
beta_true_scale = mod_logis.coef_.ravel()
beta_true_scale
Out[29]:
In [ ]:
In [ ]:
In [ ]:
In [30]:
# HMC - Scaled
nsample = 1000
m = 20
eps = .001
theta = np.zeros(p)
#theta = beta_true_unscale.copy()
phi = 5
M = np.identity(p)
samples_hmc, accept, rho, H_hmc = sghmc.run_hmc(Y, Xs, sghmc.U_logistic, sghmc.gradU_logistic, M, eps, m, theta, phi, nsample)
beta_est_hmc = np.mean(samples_hmc, axis=0)
beta_est_hmc - beta_true_scale
Out[30]:
In [31]:
plt.plot((samples_hmc - beta_true_scale)[:,1])
plt.savefig('hmc-trace.pdf')
In [32]:
fig, ax = plt.subplots(figsize=(4,3))
ax.plot(H)
ax.set_title("Total energy")
ax.set_xlabel("Number of samples")
plt.savefig('hmc-energy.pdf')
In [ ]:
fig, ax = plt.subplots(3,1, figsize=(6,10))
i = 0
ax[0].plot((samples - beta_true_scale)[:,i])
ax[0].set_title(names[i])
i = 1
ax[1].plot((samples - beta_true_scale)[:,i])
ax[1].set_title(names[i])
i = 2
ax[2].plot((samples - beta_true_scale)[:,i])
ax[2].set_title(names[i])
In [ ]:
In [37]:
# HMC - Scaled (no intercept)
nsample = 1000
m = 20
eps = .002
theta = np.zeros(p)
#theta = beta_true_scale.copy()
phi = 5
nbatch = 500
C = 1 * np.identity(p)
V = 0 * np.identity(p)
M = np.identity(p)
samples_sghmc, H_sghmc = sghmc.run_sghmc(Y, Xs, sghmc.U_logistic, sghmc.stogradU_logistic, M, eps, m, theta, C, V, phi, nsample, nbatch)
beta_est_sghmc = np.mean(samples_sghmc, axis=0)
beta_est_sghmc - beta_true_scale
Out[37]:
In [ ]:
plt.plot((samples - beta_true_scale)[:,0])
plt.savefig('sghmc-trace.pdf')
In [ ]:
plt.plot(H)
plt.savefig('sghmc-energy.pdf')
In [ ]:
fig, ax = plt.subplots(3,1, figsize=(6,10))
i = 0
ax[0].plot((samples - beta_true_scale)[:,i])
ax[0].set_title(names[i])
i = 1
ax[1].plot((samples - beta_true_scale)[:,i])
ax[1].set_title(names[i])
i = 2
ax[2].plot((samples - beta_true_scale)[:,i])
ax[2].set_title(names[i])
In [ ]:
# Gradient descent - Scaled
np.random.seed(2)
phi = .1
beta_est_gd = sghmc.gd(Y, Xs, sghmc.gradU_logistic, .1, 10000, np.zeros(p), phi)
beta_est_gd - beta_true_scale
In [ ]:
In [ ]:
In [38]:
import seaborn as sns
import pandas as pd
In [48]:
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame([[1,2,3],[7,0,3],[1,2,2]],columns=['col1','col2','col3'])
df.plot()
plt.show
Out[48]:
In [39]:
tit = sns.load_dataset("titanic")
In [49]:
df = pd.DataFrame(np.vstack((beta_true_scale,
beta_est_hmc,
beta_est_sghmc,
beta_est_gd)).T,
columns=['reg','hmc','sghmc','gd'])
df.plot()
ax.set_title("Coefficient Estimates")
ax.set_xlabel("Coefficient")
plt.savefig('coefs-pima.pdf')
plt.show()
In [122]:
df = pd.DataFrame({'reg': beta_true_scale,
'hmc': beta_est_hmc,
'sghmc': beta_est_sghmc,
'gd': beta_est_gd,
'name':names[:-1]})
that = (85, 62, 54, 20)
means_reg = tuple(df[["reg"]].values.tolist())
int(means_reg)
In [120]:
# data to plot
n_groups = 8
means_reg = tuple(df[["reg"]].values.tolist())
means_sghmc = tuple(df[["sghmc"]].values.tolist())
means_gd = tuple(df[["gd"]].values.tolist())
means_hmc = tuple(df[["hmc"]].values.tolist())
nam = tuple(df[["name"]].values.tolist())
# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.35
opacity = 0.8
rects1 = plt.bar(index, means_reg, bar_width,
alpha=opacity,
color='b',
label='MLE')
rects2 = plt.bar(index+bar_width, means_gd, bar_width,
alpha=opacity,
color='g',
label='Gradient Descent')
rects3 = plt.bar(index, means_hmc, bar_width,
alpha=opacity,
color='r',
label='Hamiltonian Monte Carlo')
rects4 = plt.bar(index, means_sghmc, bar_width,
alpha=opacity,
color='y',
label='Stochastic Gradient HMC')
plt.xlabel('Coefficient')
plt.ylabel('Value')
plt.title('Scores by person')
plt.xticks(index + bar_width, nam)
plt.legend()
plt.tight_layout()
plt.show()
In [98]:
# data to plot
n_groups = 4
means_frank = (90, 55, 40, 65)
means_guido = (85, 62, 54, 20)
# create plot
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.35
opacity = 0.8
rects1 = plt.bar(index, means_frank, bar_width,
alpha=opacity,
color='b',
label='Frank')
rects2 = plt.bar(index + bar_width, means_guido, bar_width,
alpha=opacity,
color='g',
label='Guido')
plt.xlabel('Person')
plt.ylabel('Scores')
plt.title('Scores by person')
plt.xticks(index + bar_width, ('A', 'B', 'C', 'D'))
plt.legend()
plt.tight_layout()
plt.show()
In [ ]: