diff --git a/R/findOverlaps-methods.R b/R/findOverlaps-methods.R index 7a09f86..8c59a04 100644 --- a/R/findOverlaps-methods.R +++ b/R/findOverlaps-methods.R @@ -223,6 +223,73 @@ setMethod("findOverlaps", c("GenomicRanges", "GRangesList"), } ) +setMethod("findOverlaps", c("GenomicRanges", "character"), + function(query, subject, maxgap=-1L, minoverlap=0L, + type=c("any", "start", "end", "within", "equal"), + select=c("all", "first", "last", "arbitrary"), + ignore.strand=FALSE) + { + ans <- findMatches(seqnames(query), subject) + selectHits(ans, select=match.arg(select)) + } +) + +setMethod("findOverlaps", c("character", "GenomicRanges"), + function(query, subject, maxgap=-1L, minoverlap=0L, + type=c("any", "start", "end", "within", "equal"), + select=c("all", "first", "last", "arbitrary"), + ignore.strand=FALSE) + { + ans <- findMatches(query, seqnames(subject)) + selectHits(ans, select=match.arg(select)) + } +) + +setMethod("findOverlaps", c("GenomicRangesList", "character"), + function(query, subject, maxgap=-1L, minoverlap=0L, + type=c("any", "start", "end", "within", "equal"), + select=c("all", "first", "last", "arbitrary"), + ignore.strand=FALSE) + { + sn <- unique(seqnames(query)) + + type <- match.arg(type) + if (type == "within") { + remap <- which(lengths(sn) == 1L) + ans <- findMatches(unlist(sn[remap]), subject) + } else { + ans <- findMatches(unlist(sn), subject) + remap <- rep(seq_along(sn), lengths(sn)) + } + + ans2 <- Hits(from = unname(remap[queryHits(ans)]), to = subjectHits(ans), nLnode = length(query), nRnode = length(subject)) + ans2 <- unique(ans2) + selectHits(ans2, select=match.arg(select)) + } +) + +setMethod("findOverlaps", c("character", "GenomicRangesList"), + function(query, subject, maxgap=-1L, minoverlap=0L, + type=c("any", "start", "end", "within", "equal"), + select=c("all", "first", "last", "arbitrary"), + ignore.strand=FALSE) + { + sn <- unique(seqnames(subject)) + + type <- match.arg(type) + if (type == "within") { + remap <- which(lengths(sn) == 1L) + ans <- findMatches(query, as.character(unlist(sn[remap]))) + } else { + ans <- findMatches(query, as.character(unlist(sn))) # TODO: fix bug in findMatches with seqnames as the second argument. + remap <- rep(seq_along(sn), lengths(sn)) + } + + ans2 <- Hits(from = queryHits(ans), to = unname(remap[subjectHits(ans)]), nLnode = length(query), nRnode = length(subject)) + ans2 <- unique(ans2) + selectHits(ans2, select=match.arg(select)) + } +) ### ========================================================================= ### findOverlaps-based methods diff --git a/inst/unitTests/test_findOverlaps-methods.R b/inst/unitTests/test_findOverlaps-methods.R index b479919..4aaf60e 100644 --- a/inst/unitTests/test_findOverlaps-methods.R +++ b/inst/unitTests/test_findOverlaps-methods.R @@ -332,3 +332,41 @@ test_poverlaps <- function() { GRanges("chr1:16-20")) checkIdentical(ans, Rle(c(FALSE, TRUE))) } + +test_findOverlaps_character <- function() { + gr <- GRanges(c("chr1:11-15", "chr1:5-100", "chr2:10-11")) + + ans <- findOverlaps(gr, "chr1") + checkIdentical(queryHits(ans), which(seqnames(gr)=="chr1")) + + ans <- findOverlaps(gr, c("chr2", "chr1")) + checkIdentical(as.character(seqnames(gr)[queryHits(ans)]), c("chr2", "chr1")[subjectHits(ans)]) + + # Works in the other direction. + ans <- findOverlaps("chr2", gr) + checkIdentical(subjectHits(ans), which(seqnames(gr)=="chr2")) + + # Works for GRLs. + gr <- GRanges(c("chr1:11-15", "chr1:5-100", "chr2:1-10", "chr2:10-11")) + grl <- split(gr, c(1,2,2,3)) + ans <- findOverlaps(grl, "chr2") + checkIdentical(queryHits(ans), unname(which(any(seqnames(grl) %in% "chr2")))) + + ans <- findOverlaps(grl, c("chr2", "chr1")) + checkIdentical(queryHits(ans)[subjectHits(ans)==1L], unname(which(any(seqnames(grl) %in% "chr2")))) + checkIdentical(queryHits(ans)[subjectHits(ans)==2L], unname(which(any(seqnames(grl) %in% "chr1")))) + + ans <- findOverlaps(grl, c("chr2", "chr1"), type="within") + checkIdentical(queryHits(ans)[subjectHits(ans)==1L], unname(which(all(seqnames(grl) %in% "chr2")))) + checkIdentical(queryHits(ans)[subjectHits(ans)==2L], unname(which(all(seqnames(grl) %in% "chr1")))) + + # Same result in the other direction. + ans2 <- findOverlaps(c("chr2", "chr1"), grl) + checkIdentical(subjectHits(ans2)[queryHits(ans2)==1L], unname(which(any(seqnames(grl) %in% "chr2")))) + checkIdentical(subjectHits(ans2)[queryHits(ans2)==2L], unname(which(any(seqnames(grl) %in% "chr1")))) + + ans2 <- findOverlaps(c("chr2", "chr1"), grl, type="within") + checkIdentical(subjectHits(ans2)[queryHits(ans2)==1L], unname(which(all(seqnames(grl) %in% "chr2")))) + checkIdentical(subjectHits(ans2)[queryHits(ans2)==2L], unname(which(all(seqnames(grl) %in% "chr1")))) +} + diff --git a/man/findOverlaps-methods.Rd b/man/findOverlaps-methods.Rd index be2e737..75fc438 100644 --- a/man/findOverlaps-methods.Rd +++ b/man/findOverlaps-methods.Rd @@ -54,6 +54,7 @@ \arguments{ \item{query, subject}{ A \link{GRanges} or \link{GRangesList} object. + Either argument may also be a character vector of sequance names. } \item{maxgap, minoverlap, type}{ See \code{?\link[IRanges]{findOverlaps}} in the \pkg{IRanges} package @@ -97,6 +98,16 @@ For \code{type="equal"} with GRangesList objects, \code{query[[i]]} matches \code{subject[[j]]} iff for each range in \code{query[[i]]} there is an identical range in \code{subject[[j]]}, and vice versa. + + When either the query or subject is a character vector, the values are + interpreted as the sequence names. A range is considered to \dQuote{overlap} + with a sequence name if it lies on that sequence. \code{maxgap} and + \code{minoverlap} are ignored. \code{type} is ignored for \link{GRanges}, and + only \code{type="any"} and \code{type="within"} are used for + \link{GRangesList} inputs (the latter requiring all ranges to lie on the + specified sequence to be considered as an overlap). This method allows users + to conveniently identify ranges on certain chromosomes without worrying about + whether the ranges are stored in a \link{GRanges} or \link{GRangesList}. } \value{