Compute mean distances and nblast scores for neurons from LM as well as traced EM neurons carried through several "bridging" regstrations from EM to LM
See https://github.com/bocklab/temca2data/tree/master/geometry_analysis
In [ ]:
library(nat)
library(nat.flybrains)
library(flycircuit)
library(elmr)
library(catmaid)
library(mushroom)
library(magrittr)
library(reshape2)
library(dplyr)
library(ggplot2)
library(igraph)
library(grid)
library(reshape2)
library(rgl)
library(dendroextras)
library(dendextend)
In [ ]:
# Source Zhihao's (ZZ) functions
fafb_dir='/groups/saalfeld/home/bogovicj/dev/template/temca2data/geometry_analysis'
setwd( fafb_dir )
source(file.path(fafb_dir,"functions.R"))
load(paste0(getwd(), "/data/lm_coll_subset_170331.rda"))
setwd( '/groups/saalfeld/home/bogovicj/dev/template/template-building/R' )
In [ ]:
# The neurons that ZZ uses in his analysis
name_list=c("35447","39139","49865","62434","23512","30434","53671","57402",
"65762","36390","42421","32399","57241","57414","23569","39682",
"51886","54072","46493","40749","57410","27884","192547","45242",
"22594","27611","37250","56424","67408","40306","40790","39668",
"32214","36108","186573","22976","38885","53631","57418","57422",
"1785034","27303","1775706","24622","35246","40637","24726","775731",
"24251","43539","771242","30791","57333","57337","57341","67637",
"27246","41308","57319","165303","23829","22132","23597","27295",
"57311","57323","57353","57381","61221","755022","2863104","23432",
"22422","56623","61773","28876","30891","33903","581536","58686",
"21999","16","22906","57361","57365","57307","60799","68697","27048",
"57349","65465","22277","22744","400943","51080","52106","55125",
"57246","39254","57499","73937","37935","42927","55085","57003",
"23134","41578")
In [ ]:
# load a set of neurons
load("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/jrc2018_fb_ca_coll.RData")
load("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/fcwb_fb_ca_coll.RData")
load("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/zz_fb_ca_coll.RData")
In [ ]:
# standardize the glomerular (aka subtype) names of PNs
fb_ca_coll[,'std_glom'] = glom_data[fb_ca_coll[,'glomerulus']]
jrc2018_fb_ca_coll[,'std_glom'] = glom_data[jrc2018_fb_ca_coll[,'glomerulus']]
fcwb_fb_ca_coll[,'std_glom'] = glom_data[fcwb_fb_ca_coll[,'glomerulus']]
# load pre-randomly selected LM PN data for analysis
# note that the LM PNs have been trimed to only keep their calyx collaterals
message( 'gloms' )
lm_std_gloms = c(fc_std_gloms, gj_std_gloms) %>% unique
# given a list of neurons, calculate pair-wise measurement (mean distance) and summarize results into a table
# for FAFB EM PNs
message( 'EM mean distance' )
fb_coll_tbl = summarize_pair_wise( fb_ca_coll, fb_std_gloms, 'FAFB', get_dist_summary )
jrc2018_fb_coll_tbl = summarize_pair_wise( jrc2018_fb_ca_coll, fb_std_gloms, 'FAFB_JRC2018', get_dist_summary )
fcwb_fb_coll_tbl = summarize_pair_wise( fcwb_fb_ca_coll, fb_std_gloms, 'FAFB_FCWB', get_dist_summary )
# given a list of neurons, calculate pair-wise measurement (nblast score) and summarize results into a table
# for FAFB EM PNs
message( 'EM nblast' )
fb_nblast_tbl = summarize_pair_wise( fb_ca_coll, fb_std_gloms, 'FAFB', get_nblast_score )
jrc2018_fb_nblast_tbl = summarize_pair_wise( jrc2018_fb_ca_coll, fb_std_gloms, 'FAFB_JRC2018', get_nblast_score )
fcwb_fb_nblast_tbl = summarize_pair_wise( fcwb_fb_ca_coll, fb_std_gloms, 'FAFB_FCWB', get_nblast_score )
# lm_subset_170331 is a subset of LM PNs from sampling the whole population of LM PNs from both sources (flycircuit and Jefferis2007)
lm_coll_subset = lm_subset_170331
# remove an extra "DM5" and an extra "DL2d" according to Marta C.'s corrected identifications of EM PN counts
message( 'remove' )
to_remove = c(which(lm_coll_subset[,'Glomerulus'] %in% "DM5")[[1]],
which(lm_coll_subset[,'Glomerulus'] %in% "DL2d")[[1]])
lm_coll_subset = lm_coll_subset[-to_remove]
# Same as above but for LM PNs
message( 'LM mean distance' )
lm_coll_tbl_subset = summarize_pair_wise(lm_coll_subset, lm_std_gloms, 'LM', get_dist_summary)
message( 'LM nblast' )
lm_nblast_subset = summarize_pair_wise(lm_coll_subset, lm_std_gloms, 'LM', get_nblast_score)
In [ ]:
# lm_std_gloms
print(head(fb_coll_tbl))
print( ' ')
print( ' ')
print(head(jrc2018_fb_coll_tbl))
print( ' ')
print( ' ')
print(head(fcwb_fb_coll_tbl))
In [ ]:
# combine the resulting tables together for plotting
###########################################
# USE FAFB -> JRC2018 -> FCWB REGISTRATION
###########################################
emlm_tbl_subset = rbind( lm_coll_tbl_subset, fb_coll_tbl, jrc2018_fb_coll_tbl ) %>%
mutate(type=factor(.$type))
emlm_nblast_tbl = rbind( lm_nblast_subset, fb_nblast_tbl, jrc2018_fb_nblast_tbl ) %>%
mutate(type=factor(.$type))
# order glom by difference between LM and EM------
all_glom_rank = rank_glom_by_diff(emlm_tbl_subset) %>%
c(gsub("glomerulus ", "", fb_gloms_extra) %>% sort)
nblast_glom_rank = rank_glom_by_nblast(emlm_nblast_tbl) %>%
c(gsub("glomerulus ", "", fb_gloms_extra) %>% sort)
######################################
# USE DIRECT TO FCWB REGISTRATION
######################################
# emlm_tbl_subset = rbind( lm_coll_tbl_subset, fb_coll_tbl, fcwb_fb_coll_tbl ) %>%
# mutate(type=factor(.$type))
# emlm_nblast_tbl = rbind( lm_nblast_subset, fb_nblast_tbl, fcwb_fb_nblast_tbl ) %>%
# mutate(type=factor(.$type))
# message( 'rank' )
# all_glom_rank = rank_glom_by_diff(emlm_tbl_subset2) %>%
# c(gsub("glomerulus ", "", fb_gloms_extra) %>% sort) %>%
# print
In [ ]:
results = tidyr::complete(emlm_tbl_subset, type, groups)
results$type = factor(results$type, levels=all_glom_rank)
results = mutate(results, pseudo_mean = dist_mean)
# fix the groups col to be the order we want
results$groups = factor(results$groups, levels=c('FAFB_JRC2018', 'FAFB', 'LM'))
In [ ]:
library(scales)
# show_col(hue_pal()(2))
# Add a new color
c2 = hue_pal()(2) # colors in Zhihao's figure
new_colors = c( "#000000", c2[1], c2[2] )
# print( new_colors )
# head(results)
# print( results[sample( nrow(results), 12 ), ])
In [ ]:
# Distance dcatter plot
ggplot(results, aes(y=dist_mean, x=groups)) +
geom_jitter(aes(color=groups), size = 2.8, width=.36) +
facet_grid(. ~ type, switch = "x") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text.x = element_text(size=24),
strip.background = element_rect(fill='white'),
legend.title = element_blank(),
text=element_text(size=32),
legend.position = "topright",
panel.spacing = unit(.2, "lines"),
panel.grid.minor = element_blank(),
# asp=12,
panel.grid.major.x = element_blank())+
scale_y_continuous(breaks=scales::pretty_breaks(n=8), limits = c(0,17), expand=c(0,0)) +
#scale_color_discrete(labels=c("EM", "LM")) +
scale_color_manual( values=new_colors ) +
ylab( expression(paste( "Pair-wise Mean Distance (",mu,"m)"))) +
xlab("PN Subtypes")
ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/distance_scatter.png",
scale=1.4, width=24, height=8 )
ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/distance_scatter.pdf",
scale=1.4, width=24, height=8 )
In [ ]:
# Distance Histogram
data_tbl = emlm_tbl_subset
mean_tbl = filter(data_tbl, type %in% unname(glom_data[fb_gloms])) %>%
group_by(groups) %>%
summarize(m=mean(dist_mean), sd=sd(dist_mean))
idx = which(data_tbl$dist_mean > 10)
data_tbl[idx, 'dist_mean'] = rep(10.2, length(idx))
# reorder groups
data_tbl$groups = factor(data_tbl$groups, levels=c('FAFB_JRC2018', 'FAFB', 'LM'))
ggplot(data_tbl, aes(x=dist_mean, fill=groups)) +
stat_bin(breaks=seq(1, 10.5, 0.5), position="dodge") +
scale_y_continuous(breaks = scales::pretty_breaks(n=10), limit=c(0,22), expand=c(0,0)) +
scale_x_continuous(breaks=seq(1, 10.5, 0.5), labels=c(seq(1, 10, 0.5), "> 10")) +
# scale_fill_discrete(labels=c('EM1', 'EM2', 'LM')) +
annotate("point", x=3.5, y=21.5, size=.5) +
annotate("point", x=5.5, y=21.5, size=.5) +
theme(legend.title = element_blank(),
text=element_text(size=32),
legend.position = 'right',
panel.background = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line()) +
scale_fill_manual( values=new_colors ) +
ylab("Counts") +
xlab( expression( paste( "Mean Distance (", mu, "m)")))
ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/distance_hist.png",
scale=1.4, width=24, height=8 )
ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/distance_hist.pdf",
scale=1.4, width=24, height=8 )
In [ ]:
nblast_results = emlm_nblast_tbl
nblast_results = tidyr::complete(nblast_results, type, groups)
nblast_results$type = factor(nblast_results$type, levels=nblast_glom_rank)
nblast_results$groups = factor(nblast_results$groups, levels=c('FAFB_JRC2018', 'FAFB', 'LM'))
data_tbl = emlm_nblast_tbl
mean_nblast_tbl = filter(data_tbl, type %in% unname(glom_data[fb_gloms])) %>%
group_by(groups) %>%
summarize(m=mean(nblast_mean_score), sd=sd(nblast_mean_score))
idx = which(data_tbl$nblast_mean_score < -0.05)
data_tbl[idx, 'nblast_mean_score'] = rep(-0.055, length(idx))
# Reorder groups
data_tbl$groups = factor(data_tbl$groups, levels=c('FAFB_JRC2018', 'FAFB', 'LM'))
head(data_tbl)
In [ ]:
# NBLAST Scatterplot
ggplot(nblast_results, aes(y=nblast_mean_score, x=groups)) +
geom_jitter(aes(color=groups), size = 2.8, width=.36) +
facet_grid(. ~ type, switch = "x") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(),
# strip.text.x=element_text(size=12),
legend.title = element_blank(),
text=element_text(size=32),
legend.position = "right",
strip.background = element_rect(fill='white'),
panel.spacing = unit(.2, "lines"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()) +
#scale_color_discrete(labels=c('EM','LM')) +
scale_y_continuous(breaks=scales::pretty_breaks(n=10), limit=c(-0.2, .9), expand=c(0,0)) +
scale_color_manual( values=new_colors ) +
ylab("Pair-wise NBLAST scores \n(higher scores => more similar)") +
xlab("PN Subtypes")
ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/nblast_scatter.png",
scale=1.4, width=24, height=8 )
ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/nblast_scatter.pdf",
scale=1.4, width=24, height=8 )
In [ ]:
# NBLAST histogram
ggplot( data_tbl, aes(x=nblast_mean_score, fill=groups)) +
stat_bin(breaks=seq(-0.1, 0.9, .05), position='dodge') +
scale_y_continuous(breaks = scales::pretty_breaks(n=10), limit=c(0,23), expand=c(0,0)) +
scale_x_continuous(breaks=seq(-0.1, 0.9, .05), labels=c("< -0.05", seq(-0.05, 0.9, .05)), expand=c(0,0)) +
#scale_fill_discrete(labels=c('EM','LM')) +
annotate("point", x=.55, y=22.5, size=.5) +
annotate("point", x=.34, y=22.5, size=.5) +
theme(legend.title = element_blank(),
text=element_text(size=32),
legend.position = 'right',
panel.background = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line()) +
scale_fill_manual( values=new_colors ) +
ylab("Counts") +
xlab("NBLAST score")
ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/nblast_hist.png",
scale=1.4, width=24, height=8 )
ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/nblast_hist.pdf",
scale=1.4, width=24, height=8 )
In [ ]: