Load packages



In [1]:

    
suppressWarnings(suppressMessages(library(gUtils)))
suppressWarnings(suppressMessages(library(BSgenome.Hsapiens.UCSC.hg19)))
Sys.setenv(DEFAULT_BSGENOME = 'BSgenome.Hsapiens.UCSC.hg19::Hsapiens')

Create GRanges for examples



In [2]:

    
## instantiate GRanges objects

gr1 = suppressWarnings(GRanges(
    seqnames = Rle(c(1, 2, 1, 3), c(1, 3, 2, 4)),
    ranges = IRanges(101:110, end = 111:120, names = head(letters, 10)),
    strand = Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
    score = 1:10,
    GC = seq(1, 0, length=10)))

gr2 = GRanges(1, IRanges(c(103, 107, 113), c(105, 109, 116)), 
    strand=c('+','-','-'), 
    name=c("A","B","C"))

print('print gr1:')
print(gr1)
print('print gr2 :')
print(gr2)









    



[1] "print gr1:"
GRanges object with 10 ranges and 2 metadata columns:
    seqnames     ranges strand |     score                GC
       <Rle>  <IRanges>  <Rle> | <integer>         <numeric>
  a        1 [101, 111]      - |         1                 1
  b        2 [102, 112]      + |         2 0.888888888888889
  c        2 [103, 113]      + |         3 0.777777777777778
  d        2 [104, 114]      * |         4 0.666666666666667
  e        1 [105, 115]      * |         5 0.555555555555556
  f        1 [106, 116]      + |         6 0.444444444444444
  g        3 [107, 117]      + |         7 0.333333333333333
  h        3 [108, 118]      + |         8 0.222222222222222
  i        3 [109, 119]      - |         9 0.111111111111111
  j        3 [110, 120]      - |        10                 0
  -------
  seqinfo: 3 sequences from an unspecified genome; no seqlengths
[1] "print gr2 :"
GRanges object with 3 ranges and 1 metadata column:
      seqnames     ranges strand |        name
         <Rle>  <IRanges>  <Rle> | <character>
  [1]        1 [103, 105]      + |           A
  [2]        1 [107, 109]      - |           B
  [3]        1 [113, 116]      - |           C
  -------
  seqinfo: 1 sequence from an unspecified genome; no seqlengths

gr.match()

gr.match(grA, grB) returns a length(grA) integer vector whose item i contains the first index in grB overlapping grA[i]



In [3]:

    
### Function: 
### gr.match()
### gr.match(grA, grB) returns a length(grA) integer vector whose item i contains the first index in grB overlapping grA[i] 
### The function has been implemented to be multithreaded via `parallel::mc.cores`
### 
### Args:
### 'query' GRanges
### 'subject' GRanges
### 'ignore.strand' if FALSE, gr.match() will be strand specific. if TRUE, it is strand-agnostic
### 'max.slice' max slice of query to match at a time (default = Inf)
### 'verbose' boolean flag to set whether to give verbose output (default = FALSE)
### 'mc.cores' the number of cores to use (default = 1)
### 
### Use:
### gr.match(query, subject)
###



In [4]:

    
gr.match(gr1, gr2, ignore.strand=TRUE)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





	1
	<NA>
	<NA>
	<NA>
	1
	2
	<NA>
	<NA>
	<NA>
	<NA>



In [5]:

    
gr.match(gr1, gr2, ignore.strand=FALSE)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





	2
	<NA>
	<NA>
	<NA>
	1
	<NA>
	<NA>
	<NA>
	<NA>
	<NA>



In [6]:

    
gr.match(gr2, gr1, ignore.strand=TRUE)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





	1
	1
	5



In [7]:

    
gr.match(gr2, gr1, ignore.strand=FALSE)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





	5
	1
	5

gr.tile.map()

gr.tile.map(gr_query, gr_subject) returns a length(gr_query) list whose items are integer vectors of indices in gr_subject which overlap gr_query



In [8]:

    
## Function: 
## gr.tile.map()
## gr.tile.map(gr_query, gr_subject) returns a length(gr_query) list whose items are integer vectors of indices in gr_subject 
## which overlap gr_query
## 
## gr.tile.map() is strand-agnostic (it ignores the strand)
## 
## 'gr.tile.map()' assumes that input query and subject have no gaps (including at end) or overlaps, i.e. ignores end()
#' coordinates and only uses "starts"
##
## Use:
## gr.tile.map(query, subject, verbose = FALSE)
## 
## Refer to function 'gr.tile()'



In [9]:

    
gr.tile.map(gr2, gr1)









    



Warning message in gr.tile.map(gr2, gr1):
“Warning: Query GRanges has gaps. Unexpected behavior may follow”Warning message in gr.tile.map(gr2, gr1):
“Warning: Subject GRanges has gaps. Unexpected behavior may follow”





    





	$`1`
		
	1
	5
	6


	$`2`
		6
	$`3`
		6

grbind()

grbind() returns a concatenated GRanges



In [10]:

    
## Function: 
## grbind() returns a concatenated GRanges
## 
## Use:
## grbind(input_GRanges1, input_GRanges2, ...)
## 
## See function 'grl.bind()'



In [11]:

    
grbind(gr1, gr2)









    



Warning message in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE):
“GRanges object contains 10 out-of-bound ranges located on sequences 1,
  2, and 3. Note that only ranges located on a non-circular sequence
  whose length is not NA can be considered out-of-bound (use seqlengths()
  and isCircular() to get the lengths and circularity flags of the
  underlying sequences). You can use trim() to trim these ranges. See
  ?`trim,GenomicRanges-method` for more information.”Warning message in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE):
“GRanges object contains 3 out-of-bound ranges located on sequence 1.
  Note that only ranges located on a non-circular sequence whose length
  is not NA can be considered out-of-bound (use seqlengths() and
  isCircular() to get the lengths and circularity flags of the underlying
  sequences). You can use trim() to trim these ranges. See
  ?`trim,GenomicRanges-method` for more information.”





    





GRanges object with 13 ranges and 3 metadata columns:
       seqnames     ranges strand |     score        GC     name
          <Rle>  <IRanges>  <Rle> | <integer> <numeric> <factor>
   [1]        1 [101, 111]      - |         1 1.0000000     <NA>
   [2]        2 [102, 112]      + |         2 0.8888889     <NA>
   [3]        2 [103, 113]      + |         3 0.7777778     <NA>
   [4]        2 [104, 114]      * |         4 0.6666667     <NA>
   [5]        1 [105, 115]      * |         5 0.5555556     <NA>
   ...      ...        ...    ... .       ...       ...      ...
   [9]        3 [109, 119]      - |         9 0.1111111     <NA>
  [10]        3 [110, 120]      - |        10 0.0000000     <NA>
  [11]        1 [103, 105]      + |      <NA>      <NA>        A
  [12]        1 [107, 109]      - |      <NA>      <NA>        B
  [13]        1 [113, 116]      - |      <NA>      <NA>        C
  -------
  seqinfo: 25 sequences from an unspecified genome



In [12]:

    
grbind(gr2, gr1)  ## order-specific









    



Warning message in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE):
“GRanges object contains 3 out-of-bound ranges located on sequence 1.
  Note that only ranges located on a non-circular sequence whose length
  is not NA can be considered out-of-bound (use seqlengths() and
  isCircular() to get the lengths and circularity flags of the underlying
  sequences). You can use trim() to trim these ranges. See
  ?`trim,GenomicRanges-method` for more information.”Warning message in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE):
“GRanges object contains 10 out-of-bound ranges located on sequences 1,
  2, and 3. Note that only ranges located on a non-circular sequence
  whose length is not NA can be considered out-of-bound (use seqlengths()
  and isCircular() to get the lengths and circularity flags of the
  underlying sequences). You can use trim() to trim these ranges. See
  ?`trim,GenomicRanges-method` for more information.”





    





GRanges object with 13 ranges and 3 metadata columns:
       seqnames     ranges strand |     name     score        GC
          <Rle>  <IRanges>  <Rle> | <factor> <integer> <numeric>
   [1]        1 [103, 105]      + |        A      <NA>      <NA>
   [2]        1 [107, 109]      - |        B      <NA>      <NA>
   [3]        1 [113, 116]      - |        C      <NA>      <NA>
   [4]        1 [101, 111]      - |     <NA>         1 1.0000000
   [5]        2 [102, 112]      + |     <NA>         2 0.8888889
   ...      ...        ...    ... .      ...       ...       ...
   [9]        1 [106, 116]      + |     <NA>         6 0.4444444
  [10]        3 [107, 117]      + |     <NA>         7 0.3333333
  [11]        3 [108, 118]      + |     <NA>         8 0.2222222
  [12]        3 [109, 119]      - |     <NA>         9 0.1111111
  [13]        3 [110, 120]      - |     <NA>        10 0.0000000
  -------
  seqinfo: 25 sequences from an unspecified genome

grl.bind()

grl.bind() returns a concatenated GRangesList



In [13]:

    
## Function: 
## grl.bind() returns a concatenated GRangesList 
## 
## Use:
## grl.bind(input_GRangesList1, input_GRangesList2, ...)
## 
## See function 'grbind()'



In [14]:

    
grlist1 = grl1[1:5]  ## create two small GRangeLists as an example
grlist2 = grl2[1:5]

grl.bind(grlist1, grlist2)









    





GRangesList object of length 10:
$1 
GRanges object with 2 ranges and 1 metadata column:
      seqnames                 ranges strand |       bin
         <Rle>              <IRanges>  <Rle> | <integer>
  [1]        5 [146981323, 146981323]      - |        10
  [2]       14 [104605059, 104605059]      + |        10

$2 
GRanges object with 2 ranges and 1 metadata column:
      seqnames                 ranges strand | bin
  [1]        9 [140100229, 140100229]      - |  26
  [2]       19 [ 24309057,  24309057]      - |  26

$3 
GRanges object with 2 ranges and 1 metadata column:
      seqnames                 ranges strand | bin
  [1]        5 [132216750, 132216750]      - |  49
  [2]       17 [  4803057,   4803057]      + |  49

...
<7 more elements>
-------
seqinfo: 25 sequences from an unspecified genome

gr.dist()

gr.dist() returns a concatenated GRangesList



In [15]:

    
## Function: 
## gr.dist() returns a concatenated GRangesList 
## 
## Args: 
## 'ignore.strand' If TRUE, will ignore strand. If FALSE, operation is strand-specific. 
## 
## Use:
## gr.dist(input_GRanges1, input_GRanges2)
##



In [16]:

    
gr.dist(gr2, gr1)









    






	NA NA NA NA 0  0 NA NA NA NA
	 0 NA NA NA 0 NA NA NA NA NA
	 1 NA NA NA 0 NA NA NA NA NA



In [17]:

    
gr.dist(gr2, gr1, ignore.strand = TRUE)









    






	0 NA NA NA 0 0 NA NA NA NA
	0 NA NA NA 0 0 NA NA NA NA
	1 NA NA NA 0 0 NA NA NA NA

gr.in()

gr.in(query, subject) returns boolean vector TRUE if query range i is found in any range in subject



In [18]:

    
## Function: 
## gr.in() 
## gr.in(query, subject) returns boolean vector TRUE if query range i is found in any range in subject. 
## 
## Motivation:
## modified implementation of GenomicRanges::findOverlaps()
## 
## Args: 
## 'query' GRanges Set of GRanges to query. Refer to gr.findoverlaps() and GenomicRanges::findOverlaps()
## 'subject' GRanges Set of GRanges as 'subject' in query. Refer to gr.findoverlaps() and GenomicRanges::findOverlaps()
## 
## Use:
## gr.in(query, subject)
##
## See function 'grl.in()'



In [19]:

    
gr.in(gr1, gr2)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





	TRUE
	FALSE
	FALSE
	FALSE
	TRUE
	TRUE
	FALSE
	FALSE
	FALSE
	FALSE

grl.in()

grl.in() calculates intersection of \code{GRangesList} with windows on genome, returning a boolean vector of match status



In [20]:

    
## Function: 
## grl.in() calculates intersection of \code{GRangesList} with windows on genome, returning a boolean vector of match status
## 
## Motivation:
## modified implementation of %in% for GRangesList
## 
## Args: 
## 'windows' GRanges pile of windows
## 'some' If TRUE, will return TRUE for GRangesList elements that intersect at least on window range (default = FALSE)
## 'only'  If TRUE, will return TRUE for  GRangesList elements only if there are no elements of query that fail to intersect with windows (default = FALSE)
## 'logical' If TRUE, will return logical otherwise will return numeric vector of number of windows overlapping each GRangeslist (default = TRUE)
## 'exact' If TRUE, will return exact intersection
##

## Use:
## gr.in(query, subject)
##
## See function 'gr.in()'



In [21]:

    
grl.in(grl.hiC[1:10], gr2)









    





	FALSE
	FALSE
	FALSE
	FALSE
	FALSE
	FALSE
	FALSE
	FALSE
	FALSE
	FALSE

gr.findoverlaps()

gr.findoverlaps(query, subject) returns intersecting GRanges, with query.id and subject.id marking sources



In [22]:

    
## Function: 
## gr.findoverlaps()
## gr.findoverlaps(query, subject) returns intersecting GRanges, with query.id and subject.id marking sources
## 
## Motivation:
## modification of {GenomicRanges::findOverlaps() with added funtionality
## 
## Args: 
## 'query' GRanges as query
## 'subject' GRanges as subject
## 'ignore.strand' Don't consider strand information during overlaps (default = TRUE)
## 'first' If TRUEm restricts to only the first match of the subject. If FALSE will return all matches. (default = FALSE)
## 'qcol' \vector of query meta-data columns to add to results (default = NULL)
## 'scol' vector of subject meta-data columns to add to results (default = NULL)
## 'type' 'type' argument as defined by \code{IRanges::findOverlaps} (\code{"any"}, \code{"start"}, \code{"end"}, \code{"within"}, \code{"equal"}). (default = 'any')
## 'by' Meta-data column to consider when performing overlaps (default = NULL)
## 'return.type'Select data format to return (supplied as character): \code{"same"}, \code{"data.table"}, \code{"GRanges"}. (default = 'same')
## 'max.chunk' Maximum number of \code{query*subject} ranges to consider at once. Lower number increases runtime but decreased memory. If \code{length(query)*length(subject)} is less than \code{max.chunk}, overlaps will run in one batch. (default = 1e3)
## 'verbose' If TRUE, will increase the verbosity. (default = FALSE)
## 'mc.cores' Number of cores to use when running in chunked mode (default = 1)
## 
## Use: 
## gr.findoverlaps(query, subject, by='column_example')
##



In [23]:

    
gr.findoverlaps(gr1, gr2)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





GRanges object with 7 ranges and 2 metadata columns:
      seqnames     ranges strand |  query.id subject.id
         <Rle>  <IRanges>  <Rle> | <integer>  <integer>
  [1]        1 [103, 105]      * |         1          1
  [2]        1 [105, 105]      * |         5          1
  [3]        1 [107, 109]      * |         1          2
  [4]        1 [107, 109]      * |         5          2
  [5]        1 [107, 109]      * |         6          2
  [6]        1 [113, 115]      * |         5          3
  [7]        1 [113, 116]      * |         6          3
  -------
  seqinfo: 25 sequences from an unspecified genome



In [24]:

    
gr1$name = c('A', 'A', 'A', 'A', 'B', 'B', 'B', 'C', 'D', 'D')  ## add meta data column 'name'


gr.findoverlaps(gr1, gr2, by='name')









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





GRanges object with 3 ranges and 2 metadata columns:
      seqnames     ranges strand |  query.id subject.id
         <Rle>  <IRanges>  <Rle> | <integer>  <integer>
  [1]        1 [103, 105]      * |         1          1
  [2]        1 [107, 109]      * |         5          2
  [3]        1 [107, 109]      * |         6          2
  -------
  seqinfo: 25 sequences from an unspecified genome

gr.setdiff()

gr.setdiff() returns indices of query in subject (modified from GenomicRanges::setdiff())



In [25]:

    
## Function: 
## gr.setdiff() returns indices of query in subject (modified from GenomicRanges::setdiff())
## 
## Motivation:
## modification of GenomicRanges::setdiff()
## 'gr.setdiff()' is robust to common edge cases of setdiff(gr1, gr2)  where gr2 ranges are contained inside gr1's (yieldings
## setdiffs yield two output ranges for some of the input gr1 intervals.
## 
##
## Args: 
## 'query' GRanges as query
## 'subject' GRanges as subject
## 'ignore.strand' If TRUE, operations will ignore strand. Refer to 'gr.findoverlaps()'. (default = TRUE)
## 'by' Meta-data column to consider when performing overlaps. Refer to 'gr.findoverlaps()' documentation (default = NULL)
##
## Use: 
## gr.setdiff(query, subject)
##



In [26]:

    
gr.setdiff(gr1, gr2)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





GRanges object with 7 ranges and 5 metadata columns:
      seqnames     ranges strand |  query.id subject.id     score        GC
         <Rle>  <IRanges>  <Rle> | <integer>  <integer> <integer> <numeric>
  [1]        1 [101, 102]      * |         1          1         1 1.0000000
  [2]        1 [106, 106]      * |         1          2         1 1.0000000
  [3]        1 [106, 106]      * |         5          2         5 0.5555556
  [4]        1 [106, 106]      * |         6          2         6 0.4444444
  [5]        1 [110, 111]      * |         1          3         1 1.0000000
  [6]        1 [110, 112]      * |         5          3         5 0.5555556
  [7]        1 [110, 112]      * |         6          3         6 0.4444444
             name
      <character>
  [1]           A
  [2]           A
  [3]           B
  [4]           B
  [5]           A
  [6]           B
  [7]           B
  -------
  seqinfo: 25 sequences from an unspecified genome

gr.merge()

gr.merge() merges GRanges by using coordinates as primary key



In [27]:

    
## Function: 
## gr.merge() merges GRanges by using coordinates as primary key
## 
## Args: 
## 'query' Set of GRanges to query. 
## 'subject' \Set of GRanges as 'subject' 
## 'by' Additional metadata fields to join on
## 'all' If TRUE, executes left and right joins
## 'all.query' If TRUE, executes left join (default = all)
## 'all.subject' If TRUE, executes right join (default = all)
## 
## Use: 
## gr.merge(query, subject)
##



In [28]:

    
gr1 = GRanges(              
    seqnames = Rle(c(1, 2, 1, 3), c(1, 3, 2, 4)),
    ranges = IRanges(101:110, end = 111:120, names = head(letters, 10)),
    strand = Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
    score = 1:10,
    GC = seq(1, 0, length=10))

gr.merge(gr1, gr2)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





GRanges object with 7 ranges and 5 metadata columns:
      seqnames     ranges strand |  query.id subject.id     score        GC
         <Rle>  <IRanges>  <Rle> | <integer>  <integer> <integer> <numeric>
  [1]        1 [103, 105]      * |         1          1         1 1.0000000
  [2]        1 [105, 105]      * |         5          1         5 0.5555556
  [3]        1 [107, 109]      * |         1          2         1 1.0000000
  [4]        1 [107, 109]      * |         5          2         5 0.5555556
  [5]        1 [107, 109]      * |         6          2         6 0.4444444
  [6]        1 [113, 115]      * |         5          3         5 0.5555556
  [7]        1 [113, 116]      * |         6          3         6 0.4444444
             name
      <character>
  [1]           A
  [2]           A
  [3]           B
  [4]           B
  [5]           B
  [6]           C
  [7]           C
  -------
  seqinfo: 25 sequences from an unspecified genome



In [29]:

    
gr1$name = c('A', 'A', 'A', 'A', 'B', 'B', 'B', 'C', 'D', 'D')  ## add meta data column 'name'


gr.merge(gr1, gr2, by='name')









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





GRanges object with 3 ranges and 5 metadata columns:
      seqnames     ranges strand |  query.id subject.id     score        GC
         <Rle>  <IRanges>  <Rle> | <integer>  <integer> <integer> <numeric>
  [1]        1 [103, 105]      * |         1          1         1 1.0000000
  [2]        1 [107, 109]      * |         5          2         5 0.5555556
  [3]        1 [107, 109]      * |         6          2         6 0.4444444
             name
      <character>
  [1]           A
  [2]           B
  [3]           B
  -------
  seqinfo: 25 sequences from an unspecified genome

gr.val()

gr.val(query, target) annotates input GRanges with values from another GRanges



In [30]:

    
## Function: 
## gr.val() annotates input GRanges with values from another GRanges
## 
## 'query' and 'target' can be GRangesList, in which case 'val' will refer to GRangeslist level values fields
##
## Args: 
## 'query' GRanges of query ranges whose 'val' column we will populate with aggregated values of 'target'
## 'target' GRanges of target ranges that already have "val" column populated
## 'val' If a character field: then aggregation will paste together the (unique) overlapping values, collapsing by comma. (default = NULL)
## 'mean' If FALSE, then will return sum instead of mean, only applies if target 'val' column is numeric. (default = TRUE)
## 'weighted' Calculate a weighted mean. If FALSE, calculates unweighted mean. (default = 'mean')
## 'na.rm' If TRUE, removes NA values when calulating means. Only applies if val column of target is numeric (default = FALSE)
## 'by' specifies additional "by" column of query AND target that will be used to match up query and target pairs (i.e. in addition to pure GRanges overlap). (default = NULL)
## 'by.prefix' Choose a set of 'val'fields by a shared prefix. (default = 'val')
## 'merge' If merge = FALSE then will cross every range in query with every level of "by" in target (and create data matrix), otherwise will assume query has "by" and merge only ranges that have matching "by" values in both query and target (default = FALSE)
## 'FUN' Optional different function to call than mean. Takes two arguments (value, na.rm = TRUE) if weighted = FALSE, and three (value, width, na.rm = TRUE) if weighted = TRUE. (default = NULL)
## 'default.val' If no hit in 'target' found in 'query', fill output 'val' field with this value. (default = NA)
## 'max.slice' Maximum number of query ranges to consider in one memory chunk. (default = Inf)
## 'mc.cores' Number of cores to use when running in chunked mode (default = 1)
## 'sep' Specifies character to use as separator when aggregating character "vals" from target, only applies if target is character (default = ', ')
## 'verbose'If TRUE, increase the verbosity of the output (default = FALSE)
## 
## 
## Use: 
## gr.val(query, target)
##



In [31]:

    
gr1 = GRanges(              
    seqnames = Rle(c(1, 2, 1, 3), c(1, 3, 2, 4)),
    ranges = IRanges(101:110, end = 111:120, names = head(letters, 10)),
    strand = Rle(strand(c("-", "+", "*", "+", "-")), c(1, 2, 2, 3, 2)),
    score = 1:10,
    GC = seq(1, 0, length=10))

gr.val(gr2, gr1)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





GRanges object with 3 ranges and 2 metadata columns:
      seqnames     ranges strand |        name     value
         <Rle>  <IRanges>  <Rle> | <character> <numeric>
  [1]        1 [103, 105]      + |           A         1
  [2]        1 [107, 109]      - |           B         1
  [3]        1 [113, 116]      - |           C         1
  -------
  seqinfo: 1 sequence from an unspecified genome; no seqlengths



In [32]:

    
gr1$name = c('A', 'A', 'A', 'A', 'B', 'B', 'B', 'C', 'D', 'D')  ## add meta data column 'name'


gr.merge(gr2, gr1, by='name')









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





GRanges object with 3 ranges and 5 metadata columns:
      seqnames     ranges strand |  query.id subject.id     score        GC
         <Rle>  <IRanges>  <Rle> | <integer>  <integer> <integer> <numeric>
  [1]        1 [103, 105]      * |         1          1         1 1.0000000
  [2]        1 [107, 109]      * |         2          5         5 0.5555556
  [3]        1 [107, 109]      * |         2          6         6 0.4444444
             name
      <character>
  [1]           A
  [2]           B
  [3]           B
  -------
  seqinfo: 25 sequences from an unspecified genome

anchorlift()

anchorlift(query, subject) "lifts" all queries with respect to subject in coordinates that are within "pad"



In [33]:

    
## Function: 
## anchorlift()
##
## "lifts" all queries with respect to subject in coordinates that are within "pad"
## i.e. puts the queries into subject-centric coordinates, which is a new genome with label "Anchor" (default)
##
## Respects strand of subject (i.e. if subject strand gr is "-" then will lift all queries to the left of it
## into positive subject-centric coordinates). Keeps track of subject and query id for later deconvolution if need be.
##
## Args:
## 'query' GRanges that will be lifted around the subject
## 'subject' GRanges around which the queries will be lifted
## 'window' integer specifying how far around each subject to gather query intervals to lift (default = 1e9)
## 'by' character vector specifying additional columms (e.g. sample id) around which to restrict overlaps (via gr.findoverlaps()). Refer to `gr.findoverlaps()` documentation. (default = NULL)
## 'seqname' String specifying the name of the output sequence around which to anchor (default = "Anchor")
## 'include.values' If TRUE, include values from query and subject (default = TRUE)
## 
## Use: 
## anchorlift(query, subject)
##



In [34]:

    
anchorlift(gr1, gr2, window=100)









    



Warning message:
“"ranges" method for Hits objects is deprecated. Please use
  overlapsRanges() instead.”





    





GRanges object with 9 ranges and 6 metadata columns:
      seqnames    ranges strand | subject.id  query.id     score        GC
         <Rle> <IRanges>  <Rle> |  <integer> <integer> <integer> <numeric>
  [1]   Anchor  [-8,  2]      * |          1         1         1 1.0000000
  [2]   Anchor  [ 2, 12]      * |          2         1         1 1.0000000
  [3]   Anchor  [ 8, 18]      * |          3         1         1 1.0000000
  [4]   Anchor  [-4,  6]      * |          1         5         5 0.5555556
  [5]   Anchor  [-2,  8]      * |          2         5         5 0.5555556
  [6]   Anchor  [ 4, 14]      * |          3         5         5 0.5555556
  [7]   Anchor  [-3,  7]      * |          1         6         6 0.4444444
  [8]   Anchor  [-3,  7]      * |          2         6         6 0.4444444
  [9]   Anchor  [ 3, 13]      * |          3         6         6 0.4444444
             name      name.1
      <character> <character>
  [1]           A           A
  [2]           A           B
  [3]           A           C
  [4]           B           A
  [5]           B           B
  [6]           B           C
  [7]           B           A
  [8]           B           B
  [9]           B           C
  -------
  seqinfo: 1 sequence from an unspecified genome; no seqlengths

rrbind

rrbind() calculates intersecting/union columns of data.frame/data.table



In [35]:

    
## Function: 
## rrbind() 
##
## Motivation:
## Implementation of'rbind()', but for calculating intersecting/union columns of data.frame/data.table
## 
## Args: 
## 'union'         If TRUE, takes union of columns (default = TRUE)
#' 'as.data.table' If TRUE, returns the binded data as a data.table (default = FALSE)
## 
## Use: 
## rrbind(dt1, dt2)
##



In [36]:

    
dt1 = gr2dt(gr1)  ## example data.table dt1
dt2 = gr2dt(gr2)  ## example data.table dt2



In [37]:

    
rrbind(dt1, dt2, as.data.table=TRUE)









    





seqnames start end strand width score GC name

	1        101      111      -        11        1       1.0000000 A        
	2        102      112      +        11        2       0.8888889 A        
	2        103      113      +        11        3       0.7777778 A        
	2        104      114      *        11        4       0.6666667 A        
	1        105      115      *        11        5       0.5555556 B        
	1        106      116      +        11        6       0.4444444 B        
	3        107      117      +        11        7       0.3333333 B        
	3        108      118      +        11        8       0.2222222 C        
	3        109      119      -        11        9       0.1111111 D        
	3        110      120      -        11       10       0.0000000 D        
	1        103      105      +         3       NA              NA A        
	1        107      109      -         3       NA              NA B        
	1        113      116      -         4       NA              NA C



In [38]:

    
rrbind(dt2, dt1, as.data.table=TRUE)









    





seqnames start end strand width name score GC

	1        103      105      +         3       A        NA              NA
	1        107      109      -         3       B        NA              NA
	1        113      116      -         4       C        NA              NA
	1        101      111      -        11       A         1       1.0000000
	2        102      112      +        11       A         2       0.8888889
	2        103      113      +        11       A         3       0.7777778
	2        104      114      *        11       A         4       0.6666667
	1        105      115      *        11       B         5       0.5555556
	1        106      116      +        11       B         6       0.4444444
	3        107      117      +        11       B         7       0.3333333
	3        108      118      +        11       C         8       0.2222222
	3        109      119      -        11       D         9       0.1111111
	3        110      120      -        11       D        10       0.0000000



In [ ]:



In [ ]:

seqnames	start	end	strand	width	score	GC	name
1	101	111	-	11	1	1.0000000	A
2	102	112	+	11	2	0.8888889	A
2	103	113	+	11	3	0.7777778	A
2	104	114	*	11	4	0.6666667	A
1	105	115	*	11	5	0.5555556	B
1	106	116	+	11	6	0.4444444	B
3	107	117	+	11	7	0.3333333	B
3	108	118	+	11	8	0.2222222	C
3	109	119	-	11	9	0.1111111	D
3	110	120	-	11	10	0.0000000	D
1	103	105	+	3	NA	NA	A
1	107	109	-	3	NA	NA	B
1	113	116	-	4	NA	NA	C