notebook.community

Edit and run



In [ ]:

    
import os
import numpy as np
import pandas as pd
import utils_sdss as utils



In [ ]:

    
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# plt.style.use("ggplot")
%matplotlib inline



In [ ]:

    
# load data
# load the unLRG sample list
listpath = "./BH_SDSS_cross_checked.xlsx"
data = pd.read_excel(listpath, "Sheet2")



In [ ]:

    
extinction_u = data["extinction_u"]
extinction_g = data["extinction_g"]
extinction_r = data["extinction_r"]
extinction_i = data["extinction_i"]
extinction_z = data["extinction_z"]

cmodelmag_u = np.nan_to_num(data["cmodelmag_u"])
cmodelmag_g = np.nan_to_num(data["cmodelmag_g"])
cmodelmag_r = np.nan_to_num(data["cmodelmag_r"])
cmodelmag_i = np.nan_to_num(data["cmodelmag_i"])
cmodelmag_z = np.nan_to_num(data["cmodelmag_z"])

cmodelmagerr_u = np.nan_to_num(data["cmodelmagerr_u"])
cmodelmagerr_g = np.nan_to_num(data["cmodelmagerr_g"])
cmodelmagerr_r = np.nan_to_num(data["cmodelmagerr_r"])
cmodelmagerr_i = np.nan_to_num(data["cmodelmagerr_i"])
cmodelmagerr_z = np.nan_to_num(data["cmodelmagerr_z"])



In [ ]:

    
# exclude bad sample
idx_u1 = np.where(cmodelmag_u != -9999)[0]
idx_u2 = np.where(cmodelmag_u != 0.0)[0]
idx_u3 = np.where(cmodelmag_u != 10000)[0]
idx = np.intersect1d(idx_u1,idx_u2)
idx = np.intersect1d(idx, idx_u3)



In [ ]:

    
# load reconstruct k_correction
with open("../../result-171102/sdss/reconf_unLRG.dat", 'r') as fp:
    reconmags = fp.readlines()



In [ ]:

    
# calc absolute magnitudes and save into excels
redshift = data["z"]
mag_abs = np.ones((len(redshift),))*10000
for j,i in enumerate(idx):
    z = redshift[i]
    dl = utils.calc_luminosity_distance(z) # luminosity distance [Mpc]
    mags = [cmodelmag_u[i],cmodelmag_g[i],cmodelmag_r[i],cmodelmag_r[i],cmodelmag_z[i]]
    exts = [extinction_u[i],extinction_g[i],extinction_r[i],extinction_r[i],extinction_z[i]]
    reconmag = reconmags[j].split(" ")
    mag_abs[i] = utils.calc_absmag(mags[2]-exts[2],dl.value,float(reconmag[3]))



In [ ]:

    
mode = data["Type"]
idx1 = np.where(mode == 1)[0]
idx2 = np.where(mode == 2)[0]
idx3 = np.where(mode == 3)[0]
idx4 = np.where(mode == 4)[0]
idx5 = np.where(mode == 5)[0]
idx6 = np.where(mode == 6)[0]
idx2_same = np.intersect1d(idx,idx2)
idx3_same = np.intersect1d(idx,idx3)



In [ ]:

    
BT = data["BT"]
idx_fr1 = np.where(BT == 1)[0]
idx_fr2 = np.where(BT == 2)[0]
idx_fr1 = np.intersect1d(idx, idx_fr1)
idx_fr2 = np.intersect1d(idx, idx_fr2)



In [ ]:

    
flux = data["S_nvss"]
lumo = utils.flux_to_luminosity(redshift = redshift, flux = flux)

Data



In [ ]:

    
lumo_fr1_typical = lumo[idx2_same] * 10**-22
lumo_fr2_typical = lumo[idx3_same] * 10**-22

mag_fr1_typical = mag_abs[idx2_same]
mag_fr2_typical = mag_abs[idx3_same]



In [ ]:

    
lumo_fr1_like = lumo[idx_fr1] * 10**-22
lumo_fr2_like = lumo[idx_fr2] * 10**-22

mag_fr1_like = mag_abs[idx_fr1]
mag_fr2_like = mag_abs[idx_fr2]



In [ ]:

    
mag_fr1 = np.hstack([mag_abs[idx_fr1], mag_abs[idx2_same]])
mag_fr2 = np.hstack([mag_abs[idx_fr2], mag_abs[idx3_same]])
lumo_fr1 = np.hstack([lumo[idx_fr1], lumo[idx2_same]]) * 10 ** -22
lumo_fr2 = np.hstack([lumo[idx_fr2], lumo[idx3_same]]) * 10 ** -22

Correlation analysis

Pearson: http://blog.csdn.net/hjh00/article/details/48230399
p-value: https://stackoverflow.com/questions/22306341/python-sklearn-how-to-calculate-p-values
Kolmogorov-Smirnov test: https://stackoverflow.com/questions/10884668/two-sample-kolmogorov-smirnov-test-in-python-scipy
Scipy.stats.kstest: https://docs.scipy.org/doc/scipy-0.7.x/reference/generated/scipy.stats.kstest.html



In [ ]:

    
import scipy.stats.stats as stats 
from sklearn.feature_selection import chi2



In [ ]:

    
# ks test
# https://docs.scipy.org/doc/scipy-0.7.x/reference/generated/scipy.stats.ks_2samp.html#scipy.stats.ks_2sam
lumo_ks_D_t,lumo_ks_p_t = stats.ks_2samp(lumo_fr1_typical,lumo_fr2_typical)
print("KS statistic of lumo: typical %.5f" % lumo_ks_D_t)
print("P-value of lumo: typical %.5e" % lumo_ks_p_t)



In [ ]:

    
mag_ks_D_t,mag_ks_p_t = stats.ks_2samp(mag_fr1_typical,mag_fr2_typical)
print("KS statistic of Mr: typical %.5f" % mag_ks_D_t)
print("P-value of Mr: typical %.5e" % mag_ks_p_t)



In [ ]:

    
# FR like
lumo_ks_D_l,lumo_ks_p_l = stats.ks_2samp(lumo_fr1_like,lumo_fr2_like)
print("KS statistic of lumo: like %.5f" % lumo_ks_D_l)
print("P-value of lumo: like %.5e" % lumo_ks_p_l)

mag_ks_D_l,mag_ks_p_l = stats.ks_2samp(mag_fr1_like,mag_fr2_like)
print("KS statistic of Mr: like %.5f" % mag_ks_D_l)
print("P-value of Mr: like %.5e" % mag_ks_p_l)



In [ ]:

    
# FR
lumo_ks_D,lumo_ks_p = stats.ks_2samp(lumo_fr1,lumo_fr2)
print("KS statistic of lumo: %.5f" % lumo_ks_D)
print("P-value of lumo: %.5e" % lumo_ks_p)

mag_ks_D,mag_ks_p = stats.ks_2samp(mag_fr1,mag_fr2)
print("KS statistic of Mr: %.5f" % mag_ks_D)
print("P-value of Mr: %.5e" % mag_ks_p)

P-value非常小，而ks statistic数值较大，认为FRI/FRII有一定的可分性。即原假设FRI/FRII的射电光学和光度服从统一分布是错误的。 但是，mag的D值，相对来说较小，说明光学数据上可分性没有luminosity高

Chi



In [ ]:

    
x_lumo = np.hstack((lumo_fr1,lumo_fr2))
x_lumo.shape



In [ ]:

    
x_lumo = np.log10(np.hstack((lumo_fr1,lumo_fr2)))
x_mag = np.hstack((mag_fr1,mag_fr2))
x_lumo_norm = (x_lumo - x_lumo.min()) / (x_lumo.max() - x_lumo.min())
x_mag_norm = (x_mag - x_mag.min()) / (x_mag.max() - x_mag.min())

x = np.vstack([x_lumo_norm,x_mag_norm])
x = x.transpose()
y = np.zeros(len(mag_abs))
y[idx2_same] = 1
y[idx_fr1] = 1
y[idx3_same] = 2
y[idx_fr2] = 2
y = y[np.where(y > 0)]

scores, pvalues = chi2(x, y)



In [ ]:

    
pvalues



In [ ]:

    
from scipy.stats import chisquare



In [ ]:

    
chisquare(x_lumo_norm, y)



In [ ]:

    
np.random.seed(12222222)
x = np.random.normal(0,1,size=(20000,))
y = np.random.normal(0,1,size=(20000,))
stats.ks_2samp(x,y)



In [ ]: