In [1]:
    
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt
import statsmodels.api   as sm
import seaborn           as sns
import pystan
    
In [2]:
    
my_data = pd.read_csv('./../../Catalogue/binom_reg_dataset.csv')
    
redshifts = my_data['Z']
index     = np.where(redshifts.values<=0.4)
logit_class = my_data['LOGIT_CLASS(1-UVUP;0-UVWEAK)'].values[index]
whan_class  = my_data['WHAN(0-NA;1-RP;2-wA;3-sA;4-SF)'].values[index]
redshift    = redshifts.values[index]
    
idx_na = np.where(whan_class==0)
idx_rp = np.where(whan_class==1)
idx_wa = np.where(whan_class==2)
idx_sa = np.where(whan_class==3)
idx_sf = np.where(whan_class==4)
    
In [62]:
    
red_seq=[redshift[idx_na].size, redshift[idx_rp].size, redshift[idx_wa].size, redshift[idx_sa].size, redshift[idx_sf].size]
print red_seq
    
    
In [63]:
    
uv_up=[np.count_nonzero(logit_class[idx_na]), np.count_nonzero(logit_class[idx_rp]), np.count_nonzero(logit_class[idx_wa]), np.count_nonzero(logit_class[idx_sa]), np.count_nonzero(logit_class[idx_sf])]
print uv_up
    
    
In [66]:
    
uv_wk = np.array(red_seq)-np.array(uv_up)
print uv_wk
    
    
In [3]:
    
x0    = redshift[idx_na]
y     = logit_class[idx_na]         # whether this is a galaxy with uv upturn or not
n_obs = x0.size
regression_data = {}
regression_data['K'] = 3      # number of betas
regression_data['X'] = sm.add_constant(np.column_stack((x0, x0**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
    
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50    
x0_sim = np.linspace(x0.min(), x0.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x0_sim, x0_sim**2)))
regression_data['N2'] = n_obs2
    
print regression_data['X2'].shape
    
    
In [4]:
    
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
    int<lower=0> N;
    int<lower=0> N2;
    int<lower=0> K;
    int Y[N];
    matrix[N,K] X;
    matrix[N2,K] X2;
    real LogN;
    }
parameters{
    vector[K] beta;
    }
transformed parameters{
    vector[N] eta;
    eta = X * beta;
    }
model{
    Y ~ bernoulli_logit(eta);
    }
generated quantities{
    /* real LLi[N2]; */
    /* real AIC; */ 
    /* real BIC; */
    /* real LogL; */
    vector[N2] etanew;
    real<lower=0, upper=1.0> pnew[N2];
    etanew = X2 * beta;
    for (j in 1:N2){
        pnew[j] = inv_logit(etanew[j]);
        /* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
        }
    /* LogL = sum(LLi); */
    /* AIC = -2 * LogL + 2 * K; */
    /* BIC = -2 * LogL + LogN * K; */
    }
#     """
fit_0 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
    
    
In [5]:
    
x1    = redshift[idx_rp]
y     = logit_class[idx_rp]         # whether this is a galaxy with uv upturn or not
n_obs = x1.size
regression_data = {}
regression_data['K'] = 3      # number of betas
regression_data['X'] = sm.add_constant(np.column_stack((x1, x1**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
    
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50    
x1_sim = np.linspace(x1.min(), x1.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x1_sim, x1_sim**2)))
regression_data['N2'] = n_obs2
    
print regression_data['X2'].shape
    
    
In [6]:
    
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
    int<lower=0> N;
    int<lower=0> N2;
    int<lower=0> K;
    int Y[N];
    matrix[N,K] X;
    matrix[N2,K] X2;
    real LogN;
    }
parameters{
    vector[K] beta;
    }
transformed parameters{
    vector[N] eta;
    eta = X * beta;
    }
model{
    Y ~ bernoulli_logit(eta);
    }
generated quantities{
    /* real LLi[N2]; */
    /* real AIC; */ 
    /* real BIC; */
    /* real LogL; */
    vector[N2] etanew;
    real<lower=0, upper=1.0> pnew[N2];
    etanew = X2 * beta;
    for (j in 1:N2){
        pnew[j] = inv_logit(etanew[j]);
        /* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
        }
    /* LogL = sum(LLi); */
    /* AIC = -2 * LogL + 2 * K; */
    /* BIC = -2 * LogL + LogN * K; */
    }
#     """
fit_1 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
    
    
In [7]:
    
x2    = redshift[idx_wa]
y     = logit_class[idx_wa]         # whether this is a galaxy with uv upturn or not
n_obs = x2.size
regression_data = {}
regression_data['K'] = 3      # number of betas
regression_data['X'] = sm.add_constant(np.column_stack((x2, x2**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
    
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50    
x2_sim = np.linspace(x2.min(), x2.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x2_sim, x2_sim**2)))
regression_data['N2'] = n_obs2
    
print regression_data['X2'].shape
    
    
In [8]:
    
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
    int<lower=0> N;
    int<lower=0> N2;
    int<lower=0> K;
    int Y[N];
    matrix[N,K] X;
    matrix[N2,K] X2;
    real LogN;
    }
parameters{
    vector[K] beta;
    }
transformed parameters{
    vector[N] eta;
    eta = X * beta;
    }
model{
    Y ~ bernoulli_logit(eta);
    }
generated quantities{
    /* real LLi[N2]; */
    /* real AIC; */ 
    /* real BIC; */
    /* real LogL; */
    vector[N2] etanew;
    real<lower=0, upper=1.0> pnew[N2];
    etanew = X2 * beta;
    for (j in 1:N2){
        pnew[j] = inv_logit(etanew[j]);
        /* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
        }
    /* LogL = sum(LLi); */
    /* AIC = -2 * LogL + 2 * K; */
    /* BIC = -2 * LogL + LogN * K; */
    }
#     """
fit_2 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
    
    
In [9]:
    
x3    = redshift[idx_sa]
y     = logit_class[idx_sa]         # whether this is a galaxy with uv upturn or not
n_obs = x3.size
regression_data = {}
regression_data['K'] = 3      # number of beta0
regression_data['X'] = sm.add_constant(np.column_stack((x3, x3**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
    
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50    
x3_sim = np.linspace(x3.min(), x3.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x3_sim, x3_sim**2)))
regression_data['N2'] = n_obs2
    
print regression_data['X2'].shape
    
    
In [10]:
    
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
    int<lower=0> N;
    int<lower=0> N2;
    int<lower=0> K;
    int Y[N];
    matrix[N,K] X;
    matrix[N2,K] X2;
    real LogN;
    }
parameters{
    vector[K] beta;
    }
transformed parameters{
    vector[N] eta;
    eta = X * beta;
    }
model{
    Y ~ bernoulli_logit(eta);
    }
generated quantities{
    /* real LLi[N2]; */
    /* real AIC; */ 
    /* real BIC; */
    /* real LogL; */
    vector[N2] etanew;
    real<lower=0, upper=1.0> pnew[N2];
    etanew = X2 * beta;
    for (j in 1:N2){
        pnew[j] = inv_logit(etanew[j]);
        /* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
        }
    /* LogL = sum(LLi); */
    /* AIC = -2 * LogL + 2 * K; */
    /* BIC = -2 * LogL + LogN * K; */
    }
#     """
fit_3 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
    
    
In [11]:
    
x4    = redshift[idx_sf]
y     = logit_class[idx_sf]         # whether this is a galaxy with uv upturn or not
n_obs = x4.size
regression_data = {}
regression_data['K'] = 3      # number of beta0
regression_data['X'] = sm.add_constant(np.column_stack((x4, x4**2)))
regression_data['N'] = n_obs
regression_data['Y'] = y
regression_data['LogN'] = np.log(n_obs)
    
# Data to be plotted -------------------------------------------------------------------------------------------
n_obs2 = 50    
x4_sim = np.linspace(x4.min(), x4.max(), n_obs2)
regression_data['X2'] = sm.add_constant(np.column_stack((x4_sim, x4_sim**2)))
regression_data['N2'] = n_obs2
    
print regression_data['X2'].shape
    
    
In [12]:
    
# Fit: STAN code -----------------------------------------------------------------------------------------------
stan_code = """
data{
    int<lower=0> N;
    int<lower=0> N2;
    int<lower=0> K;
    int Y[N];
    matrix[N,K] X;
    matrix[N2,K] X2;
    real LogN;
    }
parameters{
    vector[K] beta;
    }
transformed parameters{
    vector[N] eta;
    eta = X * beta;
    }
model{
    Y ~ bernoulli_logit(eta);
    }
generated quantities{
    /* real LLi[N2]; */
    /* real AIC; */ 
    /* real BIC; */
    /* real LogL; */
    vector[N2] etanew;
    real<lower=0, upper=1.0> pnew[N2];
    etanew = X2 * beta;
    for (j in 1:N2){
        pnew[j] = inv_logit(etanew[j]);
        /* LLi[j] = bernoulli_lpmf(1|pnew[j]); */
        }
    /* LogL = sum(LLi); */
    /* AIC = -2 * LogL + 2 * K; */
    /* BIC = -2 * LogL + LogN * K; */
    }
#     """
fit_4 = pystan.stan(model_code=stan_code, data=regression_data, iter=5000, chains=3, warmup=2000, n_jobs=1)
    
    
In [13]:
    
output_0     = np.array(str(pystan.misc._print_stanfit(fit_0, digits_summary=4)).split('\n'))
output_1     = np.array(str(pystan.misc._print_stanfit(fit_1, digits_summary=4)).split('\n'))
output_2     = np.array(str(pystan.misc._print_stanfit(fit_2, digits_summary=4)).split('\n'))
output_3     = np.array(str(pystan.misc._print_stanfit(fit_3, digits_summary=4)).split('\n'))
output_4     = np.array(str(pystan.misc._print_stanfit(fit_4, digits_summary=4)).split('\n'))
    
In [14]:
    
new_output_0 = output_0[5:-6]                                                        # removing header and footer
new_output_1 = output_1[5:-6]
new_output_2 = output_2[5:-6]
new_output_3 = output_3[5:-6]
new_output_4 = output_4[5:-6]
    
In [15]:
    
print new_output_0.size, new_output_1.size, new_output_2.size, new_output_3.size, new_output_4.size
    
    
In [ ]:
    
# posteriors = list(fit.extract(u'beta').items()[0])
    
In [ ]:
    
# betas = posteriors[1]
    
In [ ]:
    
# beta0 = betas[:,0]
# beta1 = betas[:,1]
# beta2 = betas[:,2]
# beta3 = betas[:,3]
    
In [ ]:
    
# plt.subplots(1,1, figsize=(20,7), sharey=True)
# plot01 = plt.subplot(1,4,1)
# sns.kdeplot(beta0, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{0}$", fontsize=25)
# plt.ylabel(r"Kernel Density", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,2, sharey=plot01)
# sns.kdeplot(beta1, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{1}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,3, sharey=plot01)
# sns.kdeplot(beta2, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{2}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,4, sharey=plot01)
# sns.kdeplot(beta2, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{2}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.tight_layout()
# plt.savefig('./../Model/Results/posterios_sharey_3d_teste.pdf', dpi=100)
# plt.show()
    
In [ ]:
    
# plt.subplots(1,1, figsize=(25,10), sharey=True)
# plot01 = plt.subplot(1,4,1)
# sns.kdeplot(beta0, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{0}$", fontsize=25)
# plt.ylabel(r"Kernel Density", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,2)
# sns.kdeplot(beta1, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{1}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,3)
# sns.kdeplot(beta2, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{2}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.subplot(1,4,4)
# sns.kdeplot(beta2, shade=True, c='#e6550d')
# plt.xlabel(r"$\beta_{2}$", fontsize=25)
# plt.tick_params('both', labelsize='20')
# plt.tight_layout()
# plt.savefig('./../Model/Results/posterios_3d_teste.pdf', dpi=100)
# plt.show()
    
In [16]:
    
diagnostics = []
for i in range(new_output_0.size):
    if len(new_output_0[i].split())<11:
        print i, len(new_output_0[i].split()),'\n'
        print new_output_0[i], '\n', new_output_0[i].split(), len(new_output_0[i].split())
        diagnostics.append(len(new_output_0[i].split()))
    else:
        continue
print np.unique(diagnostics)
    
    
In [17]:
    
new_output_0[1] = 'beta[1] 91.382 0.8678 27.388 42.040 72.439 89.485 1.084e2 1.498e2 996 1.0052'
new_output_0[2] = 'beta[2] -1.862e2 1.9758 63.131 -3.201e2 -2.257e2 -1.823e2 -1.424e2 -73.77 1021 1.0054'
    
In [18]:
    
diagnostics = []
for i in range(new_output_1.size):
    if len(new_output_1[i].split())<11:
        print i, len(new_output_1[i].split()),'\n'
        print new_output_1[i], '\n', new_output_1[i].split(), len(new_output_1[i].split())
        diagnostics.append(len(new_output_1[i].split()))
    else:
        continue
print np.unique(diagnostics)
    
    
In [19]:
    
new_output_1[2] = 'beta[2] -96.46 1.2535 43.402 -1.845e2 -1.246e2 -96.18 -67.83 -14.10 1199 1.0041'
    
In [20]:
    
diagnostics = []
for i in range(new_output_2.size):
    if len(new_output_2[i].split())<11:
        print i, len(new_output_2[i].split()),'\n'
        print new_output_2[i], '\n', new_output_2[i].split(), len(new_output_2[i].split())
        diagnostics.append(len(new_output_2[i].split()))
    else:
        continue
print np.unique(diagnostics)
    
    
In [21]:
    
new_output_2[1] = 'beta[1] -46.53 1.3164 42.657 -1.333e2 -74.51 -46.77 -18.08 35.696 1050 1.0008'
new_output_2[2] = 'beta[2] 1.382e2 3.8353 1.263e2 -1.032e2 53.881 1.366e2 2.208e2 3.977e2 1084 1.001'
    
In [22]:
    
diagnostics = []
for i in range(new_output_3.size):
    if len(new_output_3[i].split())<11:
        print i, len(new_output_3[i].split()),'\n'
        print new_output_3[i], '\n', new_output_3[i].split(), len(new_output_3[i].split())
        diagnostics.append(len(new_output_3[i].split()))
    else:
        continue
print np.unique(diagnostics)
    
    
In [23]:
    
new_output_3[1] = 'beta[1] 14.439 1.309 44.640 -72.30 -15.47 13.304 44.059 1.036e2 1163 1.0009'
new_output_3[2] = 'beta[2] -35.69 3.3417 1.158e2 -2.668e2 -1.113e2 -32.20 43.063 1.857e2 1201 1.0009'
    
In [24]:
    
diagnostics = []
for i in range(new_output_4.size):
    if len(new_output_4[i].split())<11:
        print i, len(new_output_4[i].split()),'\n'
        print new_output_4[i], '\n', new_output_4[i].split(), len(new_output_4[i].split())
        diagnostics.append(len(new_output_4[i].split()))
    else:
        continue
print np.unique(diagnostics)
    
    
In [25]:
    
new_output_4[2] = 'beta[2] 65.498 1.2748 46.873 -23.10 32.794 64.984 96.879 1.589e2 1352 1.0032'
    
In [26]:
    
header_fit = output_0[4].split()
print header_fit
    
    
In [27]:
    
header_addendum = 'parameter'
header_fit = [header_addendum] + header_fit
print header_fit
    
    
In [28]:
    
new_data_0 = list(np.zeros(len(header_fit)))
for i in range(new_output_0.size):
    if len(new_output_0[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
        print "there is a problem!"
    else:
        new_output_i = np.array(new_output_0[i].split()).reshape(1,11)
        new_data_0   = np.vstack((new_data_0, new_output_i))
new_data_0 = new_data_0[1:,:]               # removing the zeroes in the beggining
    
In [29]:
    
new_data_1 = list(np.zeros(len(header_fit)))
for i in range(new_output_1.size):
    if len(new_output_1[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
        print "there is a problem!"
    else:
        new_output_i = np.array(new_output_1[i].split()).reshape(1,11)
        new_data_1   = np.vstack((new_data_1, new_output_i))
new_data_1 = new_data_1[1:,:]               # removing the zeroes in the beggining
    
In [30]:
    
new_data_2 = list(np.zeros(len(header_fit)))
for i in range(new_output_2.size):
    if len(new_output_2[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
        print "there is a problem!"
    else:
        new_output_i = np.array(new_output_2[i].split()).reshape(1,11)
        new_data_2   = np.vstack((new_data_2, new_output_i))
new_data_2 = new_data_2[1:,:]               # removing the zeroes in the beggining
    
In [31]:
    
new_data_3 = list(np.zeros(len(header_fit)))
for i in range(new_output_3.size):
    if len(new_output_3[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
        print "there is a problem!"
    else:
        new_output_i = np.array(new_output_3[i].split()).reshape(1,11)
        new_data_3   = np.vstack((new_data_3, new_output_i))
new_data_3 = new_data_3[1:,:]               # removing the zeroes in the beggining
    
In [32]:
    
new_data_4 = list(np.zeros(len(header_fit)))
for i in range(new_output_4.size):
    if len(new_output_4[i].split())!=11: # the length of the list must be 11, in which case we connect them directly
        print "there is a problem!"
    else:
        new_output_i = np.array(new_output_4[i].split()).reshape(1,11)
        new_data_4   = np.vstack((new_data_4, new_output_i))
new_data_4 = new_data_4[1:,:]               # removing the zeroes in the beggining
    
In [ ]:
    
# new_dataframe         = pd.DataFrame(new_data)
# new_dataframe.columns = header_fit
# new_dataframe.to_csv('./Results/fit_results_3d_teste.csv', sep=',', index=False)
    
In [ ]:
    
# betas = {}
# betas['beta0'] = beta0
# betas['beta1'] = beta1
# betas['beta2'] = beta2
# betas['beta3'] = beta3
    
In [ ]:
    
# betas_dataframe = pd.DataFrame(betas)
# betas_dataframe.to_csv('./Results/betas_3d_teste.csv', sep=',', header=True, index=False)
    
In [42]:
    
parameters = new_data_0[:,0].astype(str)
pnew_idxs  = []
for i in range(parameters.size):
    if parameters[i][0:4]=='pnew':
        pnew_idxs.append(i)
    else:
        continue
model_results_0 = np.column_stack((new_data_0[pnew_idxs,:], x0_sim))
model_results0_df = pd.DataFrame(model_results_0)
model_results0_df.columns = header_fit + ['Z']
model_results0_df.to_csv('./Results/fit_el_fit0.csv', sep=',', header=True, index=False)
    
In [44]:
    
parameters = new_data_1[:,0].astype(str)
pnew_idxs  = []
for i in range(parameters.size):
    if parameters[i][0:4]=='pnew':
        pnew_idxs.append(i)
    else:
        continue
model_results_1 = np.column_stack((new_data_1[pnew_idxs,:], x1_sim))
model_results1_df = pd.DataFrame(model_results_1)
model_results1_df.columns = header_fit + ['Z']
model_results1_df.to_csv('./Results/fit_el_fit1.csv', sep=',', header=True, index=False)
    
In [45]:
    
parameters = new_data_2[:,0].astype(str)
pnew_idxs  = []
for i in range(parameters.size):
    if parameters[i][0:4]=='pnew':
        pnew_idxs.append(i)
    else:
        continue
model_results_2 = np.column_stack((new_data_2[pnew_idxs,:], x2_sim))
model_results2_df = pd.DataFrame(model_results_2)
model_results2_df.columns = header_fit + ['Z']
model_results2_df.to_csv('./Results/fit_el_fit2.csv', sep=',', header=True, index=False)
    
In [46]:
    
parameters = new_data_3[:,0].astype(str)
pnew_idxs  = []
for i in range(parameters.size):
    if parameters[i][0:4]=='pnew':
        pnew_idxs.append(i)
    else:
        continue
model_results_3 = np.column_stack((new_data_3[pnew_idxs,:], x3_sim))
model_results3_df = pd.DataFrame(model_results_3)
model_results3_df.columns = header_fit + ['Z']
model_results3_df.to_csv('./Results/fit_el_fit3.csv', sep=',', header=True, index=False)
    
In [47]:
    
parameters = new_data_4[:,0].astype(str)
pnew_idxs  = []
for i in range(parameters.size):
    if parameters[i][0:4]=='pnew':
        pnew_idxs.append(i)
    else:
        continue
model_results_4 = np.column_stack((new_data_4[pnew_idxs,:], x4_sim))
model_results4_df = pd.DataFrame(model_results_4)
model_results4_df.columns = header_fit + ['Z']
model_results4_df.to_csv('./Results/fit_el_fit4.csv', sep=',', header=True, index=False)
    
In [49]:
    
model_results1_df
    
    Out[49]:
In [ ]: