In [1]:
library(Seurat)
library(dplyr)
library(Matrix)
library(cowplot)
library(monocle)
library(cellrangerRkit)
library(reshape)
library(tidyverse)


Loading required package: ggplot2
Loading required package: cowplot

Attaching package: ‘cowplot’

The following object is masked from ‘package:ggplot2’:

    ggsave

Loading required package: Matrix
Warning message:
“replacing previous import ‘colorspace::plot’ by ‘graphics::plot’ when loading ‘NMF’”Warning message:
“replacing previous import ‘dendextend::cutree’ by ‘stats::cutree’ when loading ‘NMF’”
Attaching package: ‘dplyr’

The following object is masked from ‘package:Biobase’:

    combine

The following objects are masked from ‘package:BiocGenerics’:

    combine, intersect, setdiff, union

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: VGAM
Loading required package: stats4
Loading required package: splines
Loading required package: DDRTree
Loading required package: irlba
Loading required package: RColorBrewer
Loading required package: bit64
Loading required package: bit
Attaching package bit
package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
creators: bit bitwhich
coercion: as.logical as.integer as.bit as.bitwhich which
operator: ! & | xor != ==
querying: print length any all min max range sum summary
bit access: length<- [ [<- [[ [[<-
for more help type ?bit

Attaching package: ‘bit’

The following object is masked from ‘package:base’:

    xor

Attaching package bit64
package:bit64 (c) 2011-2012 Jens Oehlschlaegel
creators: integer64 seq :
coercion: as.integer64 as.vector as.logical as.integer as.double as.character as.bin
logical operator: ! & | xor != == < <= >= >
arithmetic operator: + - * / %/% %% ^
math: sign abs sqrt log log2 log10
math: floor ceiling trunc round
querying: is.integer64 is.vector [is.atomic} [length] format print str
values: is.na is.nan is.finite is.infinite
aggregation: any all min max range sum prod
cumulation: diff cummin cummax cumsum cumprod
access: length<- [ [<- [[ [[<-
combine: c rep cbind rbind as.data.frame
WARNING don't use as subscripts
WARNING semantics differ from integer
for more help type ?bit64

Attaching package: ‘bit64’

The following object is masked from ‘package:bit’:

    still.identical

The following object is masked from ‘package:Biobase’:

    cache

The following objects are masked from ‘package:BiocGenerics’:

    match, order, rank

The following objects are masked from ‘package:base’:

    :, %in%, is.double, match, order, rank

Loading required package: Rmisc
Loading required package: lattice
Loading required package: plyr
------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
------------------------------------------------------------------------------

Attaching package: ‘plyr’

The following objects are masked from ‘package:dplyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize


Attaching package: ‘cellrangerRkit’

The following object is masked from ‘package:Seurat’:

    run_tsne


Attaching package: ‘reshape’

The following objects are masked from ‘package:plyr’:

    rename, round_any

The following object is masked from ‘package:dplyr’:

    rename

The following object is masked from ‘package:Matrix’:

    expand

Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Conflicts with tidy packages ---------------------------------------------------
arrange():   dplyr, plyr
combine():   dplyr, Biobase, BiocGenerics
compact():   purrr, plyr
count():     dplyr, plyr
expand():    tidyr, reshape, Matrix
failwith():  dplyr, plyr
fill():      tidyr, VGAM
filter():    dplyr, stats
ggsave():    ggplot2, cowplot
id():        dplyr, plyr
lag():       dplyr, stats
mutate():    dplyr, plyr
Position():  ggplot2, BiocGenerics, base
rename():    dplyr, reshape, plyr
summarise(): dplyr, plyr
summarize(): dplyr, plyr

In [ ]:


In [2]:
alv <- as.matrix(read.csv("./malv-1k.mat", header=TRUE, row.names=1))

In [3]:
utl <- as.matrix(read.csv("./utl-1k.mat", header=TRUE, row.names=1))

In [4]:
dim(alv)
dim(utl)


  1. 61187
  2. 1017
  1. 44021
  2. 1017

In [ ]:


In [5]:
alv[is.na(alv)] <- 0
utl[is.na(utl)] <- 0

In [6]:
gene_names <- data.frame(rownames(alv))
colnames(gene_names) <- c("id")
rownames(gene_names) <- gene_names[,c("id")]

cell_names <- data.frame(colnames(alv))
colnames(cell_names) <- c("cell_id")
rownames(cell_names) <- cell_names[,c("cell_id")]

In [ ]:


In [ ]:


In [7]:
# rownames(condition2) <- make.names(x[,1],unique=TRUE)

In [8]:
alv.data <- newCellDataSet(alv,
                       phenoData = new("AnnotatedDataFrame", data = cell_names),
                       featureData = new("AnnotatedDataFrame", data = gene_names),
                       lowerDetectionLimit = 0.5,
                       expressionFamily = negbinomial.size())


Warning message in newCellDataSet(alv, phenoData = new("AnnotatedDataFrame", data = cell_names), :
“None of your featureData columns are named 'gene_short_name', some functions will not be able
           to take this function as input as a result”

In [9]:
gene_names <- data.frame(rownames(utl))
colnames(gene_names) <- c("id")
rownames(gene_names) <- gene_names[,c("id")]

cell_names <- data.frame(colnames(utl))
colnames(cell_names) <- c("cell_id")
rownames(cell_names) <- cell_names[,c("cell_id")]

In [10]:
utl.data <- newCellDataSet(utl,
                       phenoData = new("AnnotatedDataFrame", data = cell_names),
                       featureData = new("AnnotatedDataFrame", data = gene_names),
                       lowerDetectionLimit = 0.5,
                       expressionFamily = negbinomial.size())


Warning message in newCellDataSet(utl, phenoData = new("AnnotatedDataFrame", data = cell_names), :
“None of your featureData columns are named 'gene_short_name', some functions will not be able
           to take this function as input as a result”

In [11]:
alv.data


CellDataSet (storageMode: environment)
assayData: 61187 features, 1017 samples 
  element names: exprs 
protocolData: none
phenoData
  sampleNames: AGATCTGCAGCTGCTG CTAGTGACAATGGACG ... AGGTCCGAGAAACGAG
    (1017 total)
  varLabels: cell_id Size_Factor
  varMetadata: labelDescription
featureData
  featureNames: ENSG00000000003.14 ENSG00000000005.5 ...
    ENSMUSG00000112931.1 (61187 total)
  fvarLabels: id
  fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:  

In [12]:
utl.data


CellDataSet (storageMode: environment)
assayData: 44021 features, 1017 samples 
  element names: exprs 
protocolData: none
phenoData
  sampleNames: AGATCTGCAGCTGCTG CTAGTGACAATGGACG ... AGGTCCGAGAAACGAG
    (1017 total)
  varLabels: cell_id Size_Factor
  varMetadata: labelDescription
featureData
  featureNames: ENSG00000000003.14 ENSG00000000005.5 ...
    ENSMUSG00000112928.1 (44021 total)
  fvarLabels: id
  fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:  

In [ ]:


In [13]:
print("estimating Size/Dispersion Input -> ")
alv.data <- estimateSizeFactors(alv.data)
alv.data <- estimateDispersions(alv.data)

#adding genes present and num-cells expressed features
print("Detect genes -> ")
alv.data <- detectGenes(alv.data, min_expr = 0.1)

#making a list of expressed genes
print("getting a list of expressed genes in at least 10 cells -> ")
# expressed_genes.tap73 <- row.names(subset(fData(alv.data), num_cells_expressed >= 2))

#count number of mRna in each cell
print("counting mRNA in each cell ")
pData(alv.data)$Total_mRNAs <- Matrix::colSums(exprs(alv.data))


[1] "estimating Size/Dispersion Input -> "
Warning message:
“Deprecated, use tibble::rownames_to_column() instead.”Removing 1046 outliers
[1] "Detect genes -> "
[1] "getting a list of expressed genes in at least 10 cells -> "
[1] "counting mRNA in each cell "

In [14]:
print("estimating Size/Dispersion Input -> ")
utl.data <- estimateSizeFactors(utl.data)
utl.data <- estimateDispersions(utl.data)

#adding genes present and num-cells expressed features
print("Detect genes -> ")
utl.data <- detectGenes(utl.data, min_expr = 0.1)

#making a list of expressed genes
print("getting a list of expressed genes in at least 10 cells -> ")
# expressed_genes.tap73 <- row.names(subset(fData(alv.data), num_cells_expressed >= 2))

#count number of mRna in each cell
print("counting mRNA in each cell ")
pData(utl.data)$Total_mRNAs <- Matrix::colSums(exprs(utl.data))


[1] "estimating Size/Dispersion Input -> "
Warning message:
“Deprecated, use tibble::rownames_to_column() instead.”Removing 816 outliers
[1] "Detect genes -> "
[1] "getting a list of expressed genes in at least 10 cells -> "
[1] "counting mRNA in each cell "

In [ ]:


In [ ]:
alv.data <- reduceDimension(alv.data, max_components = 2, method = 'DDRTree', cores=5)

In [ ]:
utl.data <- reduceDimension(utl.data, max_components = 2, method = 'DDRTree', cores=5)

In [ ]:


In [ ]:
alv.data <- orderCells(alv.data)

In [ ]:
utl.data <- orderCells(utl.data)

In [ ]:


In [ ]:
save(alv.data, file="alv-1k_time.RData")

In [ ]:
save(utl.data, file="utl-1k_time.RData")

In [ ]:


In [ ]:
plot_cell_trajectory(alv.data)

In [ ]:
plot_cell_trajectory(utl.data)

In [ ]: