In [204]:
library(data.table)
source("/net/fantasia/home/yjingj/Scripts/Rscript/R_Funcs/LoadData_func.r")
source("/net/fantasia/home/yjingj/Scripts/Rscript/R_Funcs/qplot_func.r")
data_dir="/net/fantasia/home/yjingj/GIT/bfGWAS_SS/1KG_example/Test_Wkdir/output/"
In [146]:
Geno = fread("/net/fantasia/home/yjingj/GIT/bfGWAS_SS/1KG_example/Test_Wkdir/output/CFH_REGION_1KG.geno",
header = TRUE)
setkey(Geno, ID)
Pheno = read.table("/net/fantasia/home/yjingj/GIT/bfGWAS_SS/1KG_example/ExData/phenoAMD_1KG.txt")
#head(Pheno)
pheno_id = colnames(Geno)[-(1:5)]
Pheno[, 2] = Pheno[, 2] - mean(Pheno[, 2])
mean(Pheno[, 2])
var(Pheno[, 2])
#length(pheno_id)
#head(pheno_id)
In [217]:
paramdata_SS = Load_bfGWAS_SS(paste(data_dir, filehead, "_SS.paramtemp", sep=""), header = FALSE)
dim(paramdata_SS)
VarSS = fread("/net/fantasia/home/yjingj/GIT/bfGWAS_SS/1KG_example/Test_Wkdir/output/CFH_REGION_1KG_SS.VarSS",
header = TRUE)
setnames(VarSS, c("ID", "CHR", "POS", "REF", "ALT", "beta", "beta_sd", "MAF", "xtx") )
setkey(VarSS, ID)
head(VarSS)
dim(VarSS)
In [ ]:
X = as.data.frame(t(Geno[paramdata_SS$ID, -(1:5), with = FALSE]) )
dim(X)
#apply(X, 2, mean) / 2
X = scale(X, center = TRUE, scale = FALSE)
XtX = t(X) %*% X
mybeta = t(X) %*% Pheno[, 2] / diag(XtX)
In [202]:
#length(mybeta)
#length(paramdata_SS$beta)
dim(X)
#dim(Pheno)
#plot(paramdata_SS$beta, t(mybeta)
#abline(0, 1)
dim(XtX)
dim( paramdata_SS)
plot(diag(XtX),)
abline(0, 1)
In [215]:
test_snp = paramdata_SS$ID[paramdata_SS$rank %in% c(2, 9, 19, 244, 3364) ]
paramdata_SS[test_snp, ]
X = as.data.frame(t(Geno[test_snp, -(1:5), with = FALSE]) )
colnames(X) = test_snp
#apply(X, 2, mean) / 2
X = scale(X, center = TRUE, scale = FALSE)
test_data = data.frame(y = Pheno[, 2], X)
#head(test_data)
fit = lm(y~., data = test_data)
summary(fit)
In [214]:
idx= sort(which(paramdata_SS$rank %in% c(2, 9, 19, 244, 3364)))
test_snp = paramdata_SS$ID[idx]
paramdata_SS[test_snp, ]
X = as.data.frame(t(Geno[test_snp, -(1:5), with = FALSE]) )
colnames(X) = test_snp
#apply(X, 2, mean) / 2
X = scale(X, center = TRUE, scale = FALSE)
XtX = t(X) %*% (X)
XtX
Xty = t(X) %*% Pheno[, 2]
Xty
solve(XtX) %*% Xty
#VarSS[test_snp, ]
#diag(XtX)
#mybeta = (Xty) / diag(XtX)
#mybeta
head(Pheno[, 2])
In [233]:
filehead="CFH_REGION_1KG"
paste(data_dir, filehead, "_SS.paramtemp", sep="")
#paramdata_SS = Load_bfGWAS_SS(paste(data_dir, filehead, "_SS.paramtemp", sep=""), header = FALSE)
#dim(paramdata_SS)
#head(paramdata_SS)
paramdata_SS = Load_bfGWAS_SS(paste(data_dir, "Read_SS.paramtemp", sep=""), header = FALSE)
#paste(data_dir, filehead, ".paramtemp", sep="")
paramdata = Load_bfGWAS(paste(data_dir, filehead, ".paramtemp", sep=""))
#dim(paramdata)
#head(paramdata)
#sum(paramdata$ID == paramdata_SS$ID)
In [234]:
#plot(-log10(paramdata_SS$pval_Chi2), -log10(paramdata$pval_LRT))
range(paramdata$pi)
which(paramdata$pi > 0.1068)
paramdata[paramdata$pi > 0.1068, ]
range(paramdata_SS$pi)
which(paramdata_SS$pi > 0.1068)
paramdata_SS[paramdata_SS$pi > 0.1068, ]
#plot(paramdata_SS$pi, paramdata$pi)
#abline(0, 1)
In [251]:
sum(paramdata_SS$pi)
sum(paramdata$pi)
#plot(paramdata_SS$pval_Chi2, paramdata$pval_LRT)
theme_set(theme_gray(base_size = 24))
qplot(paramdata_SS$pi, paramdata$pi, main = "Bayesian PP") + xlab("Using Summary Statistics") +
ylab("Using Individual-level Data") + xlim(0, 1) + ylim(0, 1) +
geom_abline(intercept = 0, slope = 1)
#plot(paramdata_SS$beta[which(paramdata_SS$pi > 0.1068)], paramdata$beta[which(paramdata_SS$pi > 0.1068)])
#abline(0, 1)
In [238]:
a = 7.67; b = 1.1225 # 1KG example
(a - b) / a
#a = 74; b = 17.8
#(a - b) / a