Compute mean distances and nblast scores for neurons from LM as well as traced EM neurons carried through several "bridging" regstrations from EM to LM

See https://github.com/bocklab/temca2data/tree/master/geometry_analysis


In [ ]:
library(nat)
library(nat.flybrains)
library(flycircuit)
library(elmr)
library(catmaid)
library(mushroom)
library(magrittr)
library(reshape2)
library(dplyr)
library(ggplot2)
library(igraph)
library(grid)
library(reshape2)
library(rgl)
library(dendroextras)
library(dendextend)

In [ ]:
# Source Zhihao's (ZZ) functions 
fafb_dir='/groups/saalfeld/home/bogovicj/dev/template/temca2data/geometry_analysis'
setwd( fafb_dir )
source(file.path(fafb_dir,"functions.R"))

load(paste0(getwd(), "/data/lm_coll_subset_170331.rda"))

setwd( '/groups/saalfeld/home/bogovicj/dev/template/template-building/R' )

In [ ]:
# The neurons that ZZ uses in his analysis
name_list=c("35447","39139","49865","62434","23512","30434","53671","57402",
            "65762","36390","42421","32399","57241","57414","23569","39682",
            "51886","54072","46493","40749","57410","27884","192547","45242",
            "22594","27611","37250","56424","67408","40306","40790","39668",
            "32214","36108","186573","22976","38885","53631","57418","57422",
            "1785034","27303","1775706","24622","35246","40637","24726","775731",
            "24251","43539","771242","30791","57333","57337","57341","67637",
            "27246","41308","57319","165303","23829","22132","23597","27295",
            "57311","57323","57353","57381","61221","755022","2863104","23432",
            "22422","56623","61773","28876","30891","33903","581536","58686",
            "21999","16","22906","57361","57365","57307","60799","68697","27048",
            "57349","65465","22277","22744","400943","51080","52106","55125",
            "57246","39254","57499","73937","37935","42927","55085","57003",
            "23134","41578")

In [ ]:
# load a set of neurons

load("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/jrc2018_fb_ca_coll.RData")

load("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/fcwb_fb_ca_coll.RData")

load("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/zz_fb_ca_coll.RData")

In [ ]:
# standardize the glomerular (aka subtype) names of PNs
fb_ca_coll[,'std_glom'] = glom_data[fb_ca_coll[,'glomerulus']]
jrc2018_fb_ca_coll[,'std_glom'] = glom_data[jrc2018_fb_ca_coll[,'glomerulus']]
fcwb_fb_ca_coll[,'std_glom'] = glom_data[fcwb_fb_ca_coll[,'glomerulus']]

# load pre-randomly selected LM PN data for analysis
# note that the LM PNs have been trimed to only keep their calyx collaterals
message( 'gloms' )
lm_std_gloms = c(fc_std_gloms, gj_std_gloms) %>% unique

# given a list of neurons, calculate pair-wise measurement (mean distance) and summarize results into a table
# for FAFB EM PNs
message( 'EM mean distance' )
fb_coll_tbl = summarize_pair_wise( fb_ca_coll, fb_std_gloms, 'FAFB', get_dist_summary )
jrc2018_fb_coll_tbl = summarize_pair_wise( jrc2018_fb_ca_coll, fb_std_gloms, 'FAFB_JRC2018', get_dist_summary )
fcwb_fb_coll_tbl = summarize_pair_wise( fcwb_fb_ca_coll, fb_std_gloms, 'FAFB_FCWB', get_dist_summary )

# given a list of neurons, calculate pair-wise measurement (nblast score) and summarize results into a table
# for FAFB EM PNs
message( 'EM nblast' )
fb_nblast_tbl = summarize_pair_wise( fb_ca_coll, fb_std_gloms, 'FAFB', get_nblast_score )
jrc2018_fb_nblast_tbl = summarize_pair_wise( jrc2018_fb_ca_coll, fb_std_gloms, 'FAFB_JRC2018', get_nblast_score )
fcwb_fb_nblast_tbl = summarize_pair_wise( fcwb_fb_ca_coll, fb_std_gloms, 'FAFB_FCWB', get_nblast_score )


# lm_subset_170331 is a subset of LM PNs from sampling the whole population of LM PNs from both sources (flycircuit and Jefferis2007)
lm_coll_subset = lm_subset_170331

# remove an extra "DM5" and an extra "DL2d" according to Marta C.'s corrected identifications of EM PN counts
message( 'remove' )
to_remove = c(which(lm_coll_subset[,'Glomerulus'] %in% "DM5")[[1]],
  which(lm_coll_subset[,'Glomerulus'] %in% "DL2d")[[1]])
lm_coll_subset = lm_coll_subset[-to_remove]


# Same as above but for LM PNs
message( 'LM mean distance' )
lm_coll_tbl_subset = summarize_pair_wise(lm_coll_subset, lm_std_gloms, 'LM', get_dist_summary)
message( 'LM nblast' )
lm_nblast_subset = summarize_pair_wise(lm_coll_subset, lm_std_gloms, 'LM', get_nblast_score)

In [ ]:
# lm_std_gloms
print(head(fb_coll_tbl))
print( ' ')
print( ' ')
print(head(jrc2018_fb_coll_tbl))
print( ' ')
print( ' ')
print(head(fcwb_fb_coll_tbl))

In [ ]:
# combine the resulting tables together for plotting

###########################################
# USE FAFB -> JRC2018 -> FCWB REGISTRATION
###########################################
emlm_tbl_subset = rbind( lm_coll_tbl_subset, fb_coll_tbl, jrc2018_fb_coll_tbl ) %>% 
    mutate(type=factor(.$type))

emlm_nblast_tbl = rbind( lm_nblast_subset, fb_nblast_tbl, jrc2018_fb_nblast_tbl ) %>% 
    mutate(type=factor(.$type))

# order glom by difference between LM and EM------
all_glom_rank = rank_glom_by_diff(emlm_tbl_subset) %>%
  c(gsub("glomerulus ", "", fb_gloms_extra) %>% sort)

nblast_glom_rank = rank_glom_by_nblast(emlm_nblast_tbl) %>%
  c(gsub("glomerulus ", "", fb_gloms_extra) %>% sort)

######################################
# USE DIRECT TO FCWB REGISTRATION
######################################
# emlm_tbl_subset = rbind( lm_coll_tbl_subset, fb_coll_tbl, fcwb_fb_coll_tbl ) %>% 
#     mutate(type=factor(.$type))

# emlm_nblast_tbl = rbind( lm_nblast_subset, fb_nblast_tbl, fcwb_fb_nblast_tbl ) %>% 
#     mutate(type=factor(.$type))

# message( 'rank' )
# all_glom_rank = rank_glom_by_diff(emlm_tbl_subset2) %>%
#   c(gsub("glomerulus ", "", fb_gloms_extra) %>% sort) %>%
#   print

In [ ]:
results = tidyr::complete(emlm_tbl_subset, type, groups)
results$type = factor(results$type, levels=all_glom_rank)
results = mutate(results, pseudo_mean = dist_mean)

# fix the groups col to be the order we want
results$groups = factor(results$groups, levels=c('FAFB_JRC2018', 'FAFB', 'LM'))

In [ ]:
library(scales)
# show_col(hue_pal()(2))

# Add a new color

c2 = hue_pal()(2)  # colors in Zhihao's figure
new_colors =  c( "#000000", c2[1], c2[2] )
# print( new_colors )
# head(results)
# print( results[sample( nrow(results), 12 ), ])

In [ ]:
# Distance dcatter plot


ggplot(results, aes(y=dist_mean, x=groups)) +
  geom_jitter(aes(color=groups), size = 2.8, width=.36) +
  facet_grid(. ~ type, switch = "x") +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        strip.text.x = element_text(size=24),
        strip.background = element_rect(fill='white'),
        legend.title = element_blank(),
        text=element_text(size=32),
        legend.position = "topright",
        panel.spacing = unit(.2, "lines"),
        panel.grid.minor = element_blank(),
#          asp=12,
        panel.grid.major.x = element_blank())+
  scale_y_continuous(breaks=scales::pretty_breaks(n=8), limits = c(0,17), expand=c(0,0)) +
  #scale_color_discrete(labels=c("EM", "LM")) +
  scale_color_manual( values=new_colors ) +
  ylab( expression(paste( "Pair-wise Mean Distance (",mu,"m)"))) +
  xlab("PN Subtypes")

ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/distance_scatter.png", 
       scale=1.4, width=24, height=8 )

ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/distance_scatter.pdf", 
       scale=1.4, width=24, height=8 )

In [ ]:
# Distance Histogram

data_tbl = emlm_tbl_subset
mean_tbl = filter(data_tbl, type %in% unname(glom_data[fb_gloms])) %>%
  group_by(groups) %>%
  summarize(m=mean(dist_mean), sd=sd(dist_mean))

idx = which(data_tbl$dist_mean > 10)
data_tbl[idx, 'dist_mean'] = rep(10.2, length(idx))

# reorder groups
data_tbl$groups = factor(data_tbl$groups, levels=c('FAFB_JRC2018', 'FAFB', 'LM'))

ggplot(data_tbl, aes(x=dist_mean, fill=groups)) +
  stat_bin(breaks=seq(1, 10.5, 0.5), position="dodge") +
  scale_y_continuous(breaks = scales::pretty_breaks(n=10), limit=c(0,22), expand=c(0,0)) +
  scale_x_continuous(breaks=seq(1, 10.5, 0.5), labels=c(seq(1, 10, 0.5), "> 10")) + 
#   scale_fill_discrete(labels=c('EM1', 'EM2', 'LM')) +
  annotate("point", x=3.5, y=21.5, size=.5) +
  annotate("point", x=5.5, y=21.5, size=.5) +
  theme(legend.title = element_blank(),
        text=element_text(size=32),
        legend.position = 'right',
        panel.background = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line()) +
  scale_fill_manual( values=new_colors ) + 
  ylab("Counts") +
  xlab( expression( paste( "Mean Distance (", mu, "m)")))

ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/distance_hist.png", 
       scale=1.4, width=24, height=8 )

ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/distance_hist.pdf", 
       scale=1.4, width=24, height=8 )

In [ ]:
nblast_results = emlm_nblast_tbl
nblast_results = tidyr::complete(nblast_results, type, groups)
nblast_results$type = factor(nblast_results$type, levels=nblast_glom_rank)
nblast_results$groups = factor(nblast_results$groups, levels=c('FAFB_JRC2018', 'FAFB', 'LM'))

data_tbl = emlm_nblast_tbl
mean_nblast_tbl = filter(data_tbl, type %in% unname(glom_data[fb_gloms])) %>% 
  group_by(groups) %>% 
  summarize(m=mean(nblast_mean_score), sd=sd(nblast_mean_score))

idx = which(data_tbl$nblast_mean_score < -0.05)
data_tbl[idx, 'nblast_mean_score'] = rep(-0.055, length(idx))

# Reorder groups
data_tbl$groups = factor(data_tbl$groups, levels=c('FAFB_JRC2018', 'FAFB', 'LM'))

head(data_tbl)

In [ ]:
# NBLAST Scatterplot

ggplot(nblast_results, aes(y=nblast_mean_score, x=groups)) +
  geom_jitter(aes(color=groups), size = 2.8, width=.36) +
  facet_grid(. ~ type, switch = "x") + 
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(),
      #  strip.text.x=element_text(size=12),
        legend.title = element_blank(),
        text=element_text(size=32),
        legend.position = "right",
        strip.background = element_rect(fill='white'),
        panel.spacing = unit(.2, "lines"),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank()) +
        #scale_color_discrete(labels=c('EM','LM')) +
    scale_y_continuous(breaks=scales::pretty_breaks(n=10), limit=c(-0.2, .9), expand=c(0,0)) +
    scale_color_manual( values=new_colors ) +
    ylab("Pair-wise NBLAST scores \n(higher scores => more similar)") +
    xlab("PN Subtypes")

ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/nblast_scatter.png", 
       scale=1.4, width=24, height=8 )

ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/nblast_scatter.pdf", 
       scale=1.4, width=24, height=8 )

In [ ]:
# NBLAST histogram

ggplot( data_tbl, aes(x=nblast_mean_score, fill=groups)) +
   stat_bin(breaks=seq(-0.1, 0.9, .05), position='dodge') +
  scale_y_continuous(breaks = scales::pretty_breaks(n=10), limit=c(0,23), expand=c(0,0)) +
  scale_x_continuous(breaks=seq(-0.1, 0.9, .05), labels=c("< -0.05", seq(-0.05, 0.9, .05)), expand=c(0,0)) +
  #scale_fill_discrete(labels=c('EM','LM')) +
  annotate("point", x=.55, y=22.5, size=.5) +
  annotate("point", x=.34, y=22.5, size=.5) +
  theme(legend.title = element_blank(), 
        text=element_text(size=32),
        legend.position = 'right',
        panel.background = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line()) +
  scale_fill_manual( values=new_colors ) + 
  ylab("Counts") +
  xlab("NBLAST score")

ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/nblast_hist.png", 
       scale=1.4, width=24, height=8 )

ggsave("/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/R/nblast_hist.pdf", 
       scale=1.4, width=24, height=8 )

In [ ]: