In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import pystan
In [2]:
my_data = pd.read_csv('./../../Catalogue/binom_reg_dataset.csv')
redshifts = my_data['Z']
index = np.where(redshifts.values<=0.4)
logit_class = my_data['LOGIT_CLASS(1-UVUP;0-UVWEAK)'].values[index]
whan_class = my_data['WHAN(0-NA;1-RP;2-wA;3-sA;4-SF)'].values[index]
redshift = redshifts.values[index]
idx_na = np.where(whan_class==0)
idx_rp = np.where(whan_class==1)
idx_wa = np.where(whan_class==2)
idx_sa = np.where(whan_class==3)
idx_sf = np.where(whan_class==4)
In [62]:
red_seq=[redshift[idx_na].size, redshift[idx_rp].size, redshift[idx_wa].size, redshift[idx_sa].size, redshift[idx_sf].size]
print red_seq
In [63]:
uv_up=[np.count_nonzero(logit_class[idx_na]), np.count_nonzero(logit_class[idx_rp]), np.count_nonzero(logit_class[idx_wa]), np.count_nonzero(logit_class[idx_sa]), np.count_nonzero(logit_class[idx_sf])]
print uv_up
In [66]:
uv_wk = np.array(red_seq)-np.array(uv_up)
print uv_wk
In [3]:
x0 = redshift[idx_na]
y = logit_class[idx_na] # whether this is a galaxy with uv upturn or not
n_obs = x0.size
regression_data = {}
regression_data['K'] = 3 # number of betas
regression_data['X'] = sm.add_constant(np.column_stack((x0, x0**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50
x0_sim = np.linspace(x0.min(), x0.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x0_sim, x0_sim**2)))
regression_data['N2'] = n_obs2
print regression_data['X2'].shape
In [4]:
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
int<lower=0> N;
int<lower=0> N2;
int<lower=0> K;
int Y[N];
matrix[N,K] X;
matrix[N2,K] X2;
real LogN;
}
parameters{
vector[K] beta;
}
transformed parameters{
vector[N] eta;
eta = X * beta;
}
model{
Y ~ bernoulli_logit(eta);
}
generated quantities{
/* real LLi[N2]; */
/* real AIC; */
/* real BIC; */
/* real LogL; */
vector[N2] etanew;
real<lower=0, upper=1.0> pnew[N2];
etanew = X2 * beta;
for (j in 1:N2){
pnew[j] = inv_logit(etanew[j]);
/* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
}
/* LogL = sum(LLi); */
/* AIC = -2 * LogL + 2 * K; */
/* BIC = -2 * LogL + LogN * K; */
}
# """
fit_0 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
In [5]:
x1 = redshift[idx_rp]
y = logit_class[idx_rp] # whether this is a galaxy with uv upturn or not
n_obs = x1.size
regression_data = {}
regression_data['K'] = 3 # number of betas
regression_data['X'] = sm.add_constant(np.column_stack((x1, x1**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50
x1_sim = np.linspace(x1.min(), x1.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x1_sim, x1_sim**2)))
regression_data['N2'] = n_obs2
print regression_data['X2'].shape
In [6]:
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
int<lower=0> N;
int<lower=0> N2;
int<lower=0> K;
int Y[N];
matrix[N,K] X;
matrix[N2,K] X2;
real LogN;
}
parameters{
vector[K] beta;
}
transformed parameters{
vector[N] eta;
eta = X * beta;
}
model{
Y ~ bernoulli_logit(eta);
}
generated quantities{
/* real LLi[N2]; */
/* real AIC; */
/* real BIC; */
/* real LogL; */
vector[N2] etanew;
real<lower=0, upper=1.0> pnew[N2];
etanew = X2 * beta;
for (j in 1:N2){
pnew[j] = inv_logit(etanew[j]);
/* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
}
/* LogL = sum(LLi); */
/* AIC = -2 * LogL + 2 * K; */
/* BIC = -2 * LogL + LogN * K; */
}
# """
fit_1 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
In [7]:
x2 = redshift[idx_wa]
y = logit_class[idx_wa] # whether this is a galaxy with uv upturn or not
n_obs = x2.size
regression_data = {}
regression_data['K'] = 3 # number of betas
regression_data['X'] = sm.add_constant(np.column_stack((x2, x2**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50
x2_sim = np.linspace(x2.min(), x2.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x2_sim, x2_sim**2)))
regression_data['N2'] = n_obs2
print regression_data['X2'].shape
In [8]:
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
int<lower=0> N;
int<lower=0> N2;
int<lower=0> K;
int Y[N];
matrix[N,K] X;
matrix[N2,K] X2;
real LogN;
}
parameters{
vector[K] beta;
}
transformed parameters{
vector[N] eta;
eta = X * beta;
}
model{
Y ~ bernoulli_logit(eta);
}
generated quantities{
/* real LLi[N2]; */
/* real AIC; */
/* real BIC; */
/* real LogL; */
vector[N2] etanew;
real<lower=0, upper=1.0> pnew[N2];
etanew = X2 * beta;
for (j in 1:N2){
pnew[j] = inv_logit(etanew[j]);
/* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
}
/* LogL = sum(LLi); */
/* AIC = -2 * LogL + 2 * K; */
/* BIC = -2 * LogL + LogN * K; */
}
# """
fit_2 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
In [9]:
x3 = redshift[idx_sa]
y = logit_class[idx_sa] # whether this is a galaxy with uv upturn or not
n_obs = x3.size
regression_data = {}
regression_data['K'] = 3 # number of beta0
regression_data['X'] = sm.add_constant(np.column_stack((x3, x3**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50
x3_sim = np.linspace(x3.min(), x3.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x3_sim, x3_sim**2)))
regression_data['N2'] = n_obs2
print regression_data['X2'].shape
In [10]:
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
int<lower=0> N;
int<lower=0> N2;
int<lower=0> K;
int Y[N];
matrix[N,K] X;
matrix[N2,K] X2;
real LogN;
}
parameters{
vector[K] beta;
}
transformed parameters{
vector[N] eta;
eta = X * beta;
}
model{
Y ~ bernoulli_logit(eta);
}
generated quantities{
/* real LLi[N2]; */
/* real AIC; */
/* real BIC; */
/* real LogL; */
vector[N2] etanew;
real<lower=0, upper=1.0> pnew[N2];
etanew = X2 * beta;
for (j in 1:N2){
pnew[j] = inv_logit(etanew[j]);
/* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
}
/* LogL = sum(LLi); */
/* AIC = -2 * LogL + 2 * K; */
/* BIC = -2 * LogL + LogN * K; */
}
# """
fit_3 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
In [11]:
x4 = redshift[idx_sf]
y = logit_class[idx_sf] # whether this is a galaxy with uv upturn or not
n_obs = x4.size
regression_data = {}
regression_data['K'] = 3 # number of beta0
regression_data['X'] = sm.add_constant(np.column_stack((x4, x4**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50
x4_sim = np.linspace(x4.min(), x4.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x4_sim, x4_sim**2)))
regression_data['N2'] = n_obs2
print regression_data['X2'].shape
In [12]:
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
int<lower=0> N;
int<lower=0> N2;
int<lower=0> K;
int Y[N];
matrix[N,K] X;
matrix[N2,K] X2;
real LogN;
}
parameters{
vector[K] beta;
}
transformed parameters{
vector[N] eta;
eta = X * beta;
}
model{
Y ~ bernoulli_logit(eta);
}
generated quantities{
/* real LLi[N2]; */
/* real AIC; */
/* real BIC; */
/* real LogL; */
vector[N2] etanew;
real<lower=0, upper=1.0> pnew[N2];
etanew = X2 * beta;
for (j in 1:N2){
pnew[j] = inv_logit(etanew[j]);
/* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
}
/* LogL = sum(LLi); */
/* AIC = -2 * LogL + 2 * K; */
/* BIC = -2 * LogL + LogN * K; */
}
# """
fit_4 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
In [13]:
output_0 = np.array(str(pystan.misc._print_stanfit(fit_0, digits_summary=4)).split('\n'))
output_1 = np.array(str(pystan.misc._print_stanfit(fit_1, digits_summary=4)).split('\n'))
output_2 = np.array(str(pystan.misc._print_stanfit(fit_2, digits_summary=4)).split('\n'))
output_3 = np.array(str(pystan.misc._print_stanfit(fit_3, digits_summary=4)).split('\n'))
output_4 = np.array(str(pystan.misc._print_stanfit(fit_4, digits_summary=4)).split('\n'))
In [14]:
new_output_0 = output_0[5:-6] # removing header and footer
new_output_1 = output_1[5:-6]
new_output_2 = output_2[5:-6]
new_output_3 = output_3[5:-6]
new_output_4 = output_4[5:-6]
In [15]:
print new_output_0.size, new_output_1.size, new_output_2.size, new_output_3.size, new_output_4.size
In [ ]:
# posteriors = list(fit.extract(u'beta').items()[0])
In [ ]:
# betas = posteriors[1]
In [ ]:
# beta0 = betas[:,0]
# beta1 = betas[:,1]
# beta2 = betas[:,2]
# beta3 = betas[:,3]
In [ ]:
# plt.subplots(1,1, figsize=(20,7), sharey=True)
# plot01 = plt.subplot(1,4,1)
# sns.kdeplot(beta0, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{0}$", fontsize=25)
# plt.ylabel(r"Kernel Density", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,2, sharey=plot01)
# sns.kdeplot(beta1, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{1}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,3, sharey=plot01)
# sns.kdeplot(beta2, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{2}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,4, sharey=plot01)
# sns.kdeplot(beta2, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{2}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.tight_layout()
# plt.savefig('./../Model/Results/posterios_sharey_3d_teste.pdf', dpi=100)
# plt.show()
In [ ]:
# plt.subplots(1,1, figsize=(25,10), sharey=True)
# plot01 = plt.subplot(1,4,1)
# sns.kdeplot(beta0, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{0}$", fontsize=25)
# plt.ylabel(r"Kernel Density", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,2)
# sns.kdeplot(beta1, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{1}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,3)
# sns.kdeplot(beta2, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{2}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,4)
# sns.kdeplot(beta2, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{2}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.tight_layout()
# plt.savefig('./../Model/Results/posterios_3d_teste.pdf', dpi=100)
# plt.show()
In [16]:
diagnostics = []
for i in range(new_output_0.size):
if len(new_output_0[i].split())<11:
print i, len(new_output_0[i].split()),'\n'
print new_output_0[i], '\n', new_output_0[i].split(), len(new_output_0[i].split())
diagnostics.append(len(new_output_0[i].split()))
else:
continue
print np.unique(diagnostics)
In [17]:
new_output_0[1] = 'beta[1] 91.382 0.8678 27.388 42.040 72.439 89.485 1.084e2 1.498e2 996 1.0052'
new_output_0[2] = 'beta[2] -1.862e2 1.9758 63.131 -3.201e2 -2.257e2 -1.823e2 -1.424e2 -73.77 1021 1.0054'
In [18]:
diagnostics = []
for i in range(new_output_1.size):
if len(new_output_1[i].split())<11:
print i, len(new_output_1[i].split()),'\n'
print new_output_1[i], '\n', new_output_1[i].split(), len(new_output_1[i].split())
diagnostics.append(len(new_output_1[i].split()))
else:
continue
print np.unique(diagnostics)
In [19]:
new_output_1[2] = 'beta[2] -96.46 1.2535 43.402 -1.845e2 -1.246e2 -96.18 -67.83 -14.10 1199 1.0041'
In [20]:
diagnostics = []
for i in range(new_output_2.size):
if len(new_output_2[i].split())<11:
print i, len(new_output_2[i].split()),'\n'
print new_output_2[i], '\n', new_output_2[i].split(), len(new_output_2[i].split())
diagnostics.append(len(new_output_2[i].split()))
else:
continue
print np.unique(diagnostics)
In [21]:
new_output_2[1] = 'beta[1] -46.53 1.3164 42.657 -1.333e2 -74.51 -46.77 -18.08 35.696 1050 1.0008'
new_output_2[2] = 'beta[2] 1.382e2 3.8353 1.263e2 -1.032e2 53.881 1.366e2 2.208e2 3.977e2 1084 1.001'
In [22]:
diagnostics = []
for i in range(new_output_3.size):
if len(new_output_3[i].split())<11:
print i, len(new_output_3[i].split()),'\n'
print new_output_3[i], '\n', new_output_3[i].split(), len(new_output_3[i].split())
diagnostics.append(len(new_output_3[i].split()))
else:
continue
print np.unique(diagnostics)
In [23]:
new_output_3[1] = 'beta[1] 14.439 1.309 44.640 -72.30 -15.47 13.304 44.059 1.036e2 1163 1.0009'
new_output_3[2] = 'beta[2] -35.69 3.3417 1.158e2 -2.668e2 -1.113e2 -32.20 43.063 1.857e2 1201 1.0009'
In [24]:
diagnostics = []
for i in range(new_output_4.size):
if len(new_output_4[i].split())<11:
print i, len(new_output_4[i].split()),'\n'
print new_output_4[i], '\n', new_output_4[i].split(), len(new_output_4[i].split())
diagnostics.append(len(new_output_4[i].split()))
else:
continue
print np.unique(diagnostics)
In [25]:
new_output_4[2] = 'beta[2] 65.498 1.2748 46.873 -23.10 32.794 64.984 96.879 1.589e2 1352 1.0032'
In [26]:
header_fit = output_0[4].split()
print header_fit
In [27]:
header_addendum = 'parameter'
header_fit = [header_addendum] + header_fit
print header_fit
In [28]:
new_data_0 = list(np.zeros(len(header_fit)))
for i in range(new_output_0.size):
if len(new_output_0[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
print "there is a problem!"
else:
new_output_i = np.array(new_output_0[i].split()).reshape(1,11)
new_data_0 = np.vstack((new_data_0, new_output_i))
new_data_0 = new_data_0[1:,:] # removing the zeroes in the beggining
In [29]:
new_data_1 = list(np.zeros(len(header_fit)))
for i in range(new_output_1.size):
if len(new_output_1[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
print "there is a problem!"
else:
new_output_i = np.array(new_output_1[i].split()).reshape(1,11)
new_data_1 = np.vstack((new_data_1, new_output_i))
new_data_1 = new_data_1[1:,:] # removing the zeroes in the beggining
In [30]:
new_data_2 = list(np.zeros(len(header_fit)))
for i in range(new_output_2.size):
if len(new_output_2[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
print "there is a problem!"
else:
new_output_i = np.array(new_output_2[i].split()).reshape(1,11)
new_data_2 = np.vstack((new_data_2, new_output_i))
new_data_2 = new_data_2[1:,:] # removing the zeroes in the beggining
In [31]:
new_data_3 = list(np.zeros(len(header_fit)))
for i in range(new_output_3.size):
if len(new_output_3[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
print "there is a problem!"
else:
new_output_i = np.array(new_output_3[i].split()).reshape(1,11)
new_data_3 = np.vstack((new_data_3, new_output_i))
new_data_3 = new_data_3[1:,:] # removing the zeroes in the beggining
In [32]:
new_data_4 = list(np.zeros(len(header_fit)))
for i in range(new_output_4.size):
if len(new_output_4[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
print "there is a problem!"
else:
new_output_i = np.array(new_output_4[i].split()).reshape(1,11)
new_data_4 = np.vstack((new_data_4, new_output_i))
new_data_4 = new_data_4[1:,:] # removing the zeroes in the beggining
In [ ]:
# new_dataframe = pd.DataFrame(new_data)
# new_dataframe.columns = header_fit
# new_dataframe.to_csv('./Results/fit_results_3d_teste.csv', sep=',', index=False)
In [ ]:
# betas = {}
# betas['beta0'] = beta0
# betas['beta1'] = beta1
# betas['beta2'] = beta2
# betas['beta3'] = beta3
In [ ]:
# betas_dataframe = pd.DataFrame(betas)
# betas_dataframe.to_csv('./Results/betas_3d_teste.csv', sep=',', header=True, index=False)
In [42]:
parameters = new_data_0[:,0].astype(str)
pnew_idxs = []
for i in range(parameters.size):
if parameters[i][0:4]=='pnew':
pnew_idxs.append(i)
else:
continue
model_results_0 = np.column_stack((new_data_0[pnew_idxs,:], x0_sim))
model_results0_df = pd.DataFrame(model_results_0)
model_results0_df.columns = header_fit + ['Z']
model_results0_df.to_csv('./Results/fit_el_fit0.csv', sep=',', header=True, index=False)
In [44]:
parameters = new_data_1[:,0].astype(str)
pnew_idxs = []
for i in range(parameters.size):
if parameters[i][0:4]=='pnew':
pnew_idxs.append(i)
else:
continue
model_results_1 = np.column_stack((new_data_1[pnew_idxs,:], x1_sim))
model_results1_df = pd.DataFrame(model_results_1)
model_results1_df.columns = header_fit + ['Z']
model_results1_df.to_csv('./Results/fit_el_fit1.csv', sep=',', header=True, index=False)
In [45]:
parameters = new_data_2[:,0].astype(str)
pnew_idxs = []
for i in range(parameters.size):
if parameters[i][0:4]=='pnew':
pnew_idxs.append(i)
else:
continue
model_results_2 = np.column_stack((new_data_2[pnew_idxs,:], x2_sim))
model_results2_df = pd.DataFrame(model_results_2)
model_results2_df.columns = header_fit + ['Z']
model_results2_df.to_csv('./Results/fit_el_fit2.csv', sep=',', header=True, index=False)
In [46]:
parameters = new_data_3[:,0].astype(str)
pnew_idxs = []
for i in range(parameters.size):
if parameters[i][0:4]=='pnew':
pnew_idxs.append(i)
else:
continue
model_results_3 = np.column_stack((new_data_3[pnew_idxs,:], x3_sim))
model_results3_df = pd.DataFrame(model_results_3)
model_results3_df.columns = header_fit + ['Z']
model_results3_df.to_csv('./Results/fit_el_fit3.csv', sep=',', header=True, index=False)
In [47]:
parameters = new_data_4[:,0].astype(str)
pnew_idxs = []
for i in range(parameters.size):
if parameters[i][0:4]=='pnew':
pnew_idxs.append(i)
else:
continue
model_results_4 = np.column_stack((new_data_4[pnew_idxs,:], x4_sim))
model_results4_df = pd.DataFrame(model_results_4)
model_results4_df.columns = header_fit + ['Z']
model_results4_df.to_csv('./Results/fit_el_fit4.csv', sep=',', header=True, index=False)
In [49]:
model_results1_df
Out[49]:
In [ ]: