In [ ]:
library(SNPRelate)
library(reshape2)
library(ggplot2)
library(Cairo)
In [ ]:
#system("rm -f flowers.gds")
#snpgdsVCF2GDS("bwa_msdr_MR_ih_lc_nr503_F.vcf.gz", "flowers.gds",
# ignore.chr.prefix = c("scaffold_", "chromosome_"))
In [ ]:
# snpgdsSummary("flowers.gds")
In [ ]:
geno <- snpgdsOpen("flowers.gds")
In [ ]:
pca <- snpgdsPCA(geno, num.thread=12, verbose = T)
The names of lines in the VCF do not match what is given in the SRA database. Our metadata table (from the SRA) has line IDs like CC-1010, whereas the VCF has CR1010. The below converts VCF names to SRA names.
In [ ]:
sra_names = sub("CR", "CC-", pca$sample.id)
Import metadata from the kWIP analysis under 'writeups'
In [ ]:
chlamy_meta = read.delim("../chlamy/chlamy_meta.tab")
Note that all the "sra names" from above conversion match the names in the SRA metadata
In [ ]:
m = match(sra_names, chlamy_meta$strain)
m
Reorder the metadata, assert the names match
In [ ]:
chlamy_meta = chlamy_meta[m, ]
print(paste(chlamy_meta$strain, sra_names))
Assemble all data & metadata for plotting
In [ ]:
plotdat = data.frame(sample=pca$sample.id,
sraname=sra_names,
region=chlamy_meta$origin,
mbases=chlamy_meta$MBases,
PC1=pca$eigenvect[,1],
PC2=pca$eigenvect[,2],
PC3=pca$eigenvect[,3])
In [ ]:
ggplot(plotdat, aes(x=PC1, y=PC2)) +
geom_point(aes(colour=region)) +
theme_bw()
The above plot is upside-down from the flowers et al. plot. Reverse PC2 and try again
In [ ]:
plotdat$PC2 = -plotdat$PC2
In [ ]:
cols = c("light blue", "blue", "dark green", "red" )
p = ggplot(plotdat, aes(x=PC1, y=PC2)) +
geom_point(aes(colour=region), size=2) +
scale_color_manual(values = cols, name="Region") +
ggtitle("SNPrelate") +
theme_bw() +
theme(panel.grid = element_blank()
#, axis.text = element_blank(), axis.ticks = element_blank()
)
print(p)
In [ ]:
pdf("chlamy_snprelate.pdf", width=4, height=3)
print(p)
dev.off()
svg("chlamy_snprelate.svg", width=4, height=3)
print(p)
dev.off()
Calculate the IBS matrix for comparison to kWIP's matrix (1-IBS ~ WIP)
(Old code -- SNP Distance (IBD))
ibd <- snpgdsIBDMoM(geno, maf=0.05, missing.rate=0.05, num.thread=12)
r = acast(snpgdsIBDSelection(ibd), ID1 ~ ID2, value.var = "kinship")
r[lower.tri(r)] = t(r)[lower.tri(r)]
write.table(r, "kinship.mat", sep="\t", quote=F)
In [ ]:
ibs = snpgdsIBS(geno)
ibs.m = ibs$ibs
rownames(ibs.m) = colnames(ibs.m) = sub("CR", "CC-", ibs$sample.id)
ibs = ibs.m
ibs1m = 1-ibs
In [ ]:
wip = as.matrix(read.delim("../chlamy/kwip/flowers_wip.dist", row.names=1))
# Rename to match strain names in VCF
rownames(wip) = colnames(wip) = chlamy_meta$Sample_name[match(rownames(wip), chlamy_meta$Run)]
# Reorder WIP matrix to IBS matrix order
m = match(rownames(ibs), rownames(wip))
wip = wip[m,m]
In [ ]:
ip = as.matrix(read.delim("../chlamy/kwip/flowers_ip.dist", row.names=1))
# Rename to match strain names in VCF
rownames(ip) = colnames(ip) = chlamy_meta$Sample_name[match(rownames(ip), chlamy_meta$Run)]
# Reorder ip matrix to IBS matrix order
m = match(rownames(ibs), rownames(ip))
ip = ip[m,m]
In [ ]:
all(rownames(wip) == rownames(ibs))
all(rownames(ip) == rownames(ibs))
In [ ]:
image(wip)
image(ip)
image(ibs1m)
In [ ]:
wip.t = wip[upper.tri(wip)]
ip.t = ip[upper.tri(wip)]
ibs.t = ibs1m[upper.tri(ibs1m)]
wip.d = as.dist(wip)
ip.d = as.dist(ip)
ibs.d = as.dist(ibs1m)
In [ ]:
cor(wip.t, ibs.t, method="spearman")
In [ ]:
cor(ip.t, ibs.t, method="spearman")
In [ ]: