diff --git a/R/GPos-class.R b/R/GPos-class.R index fe36a06a..81eb2ba3 100644 --- a/R/GPos-class.R +++ b/R/GPos-class.R @@ -176,7 +176,7 @@ GPos <- function(seqnames=NULL, pos=NULL, strand=NULL, if (is(x_ranges, "IRanges")) # i.e. 'x' is not a GPos strand <- rep.int(strand, width(x_ranges)) } - if (length(mcols) == 0L && is(x, "GPos")) + if (length(mcols) == 0L && inherits(x, "GenomicRanges")) mcols <- mcols(x, use.names=FALSE) if (is.null(seqinfo)) seqinfo <- seqinfo(x) diff --git a/R/makeGRangesFromDataFrame.R b/R/makeGRangesFromDataFrame.R index 4226dd2f..a0ab8acc 100644 --- a/R/makeGRangesFromDataFrame.R +++ b/R/makeGRangesFromDataFrame.R @@ -9,7 +9,6 @@ stop("'", what, ".field' must be a character vector with no NAs") tolower(field) } - .collect_prefixes <- function(df_colnames, field) { df_colnames_nc <- nchar(df_colnames) @@ -29,6 +28,10 @@ idx2 <- which(df_colnames %in% end.field) if (length(idx1) == 1L && length(idx2) == 1L) return(list(c(start=idx1, end=idx2), "")) + if (length(idx1) == 1L && length(idx2) == 0L) + return(list(c(start=idx1, end=idx1), "")) + if (length(idx1) == 0L && length(idx2) == 1L) + return(list(c(start=idx2, end=idx2), "")) if (length(idx1) == 0L && length(idx2) == 0L) { prefixes1 <- .collect_prefixes(df_colnames, start.field) prefixes2 <- .collect_prefixes(df_colnames, end.field) @@ -74,7 +77,7 @@ .find_strand_col <- function(df_colnames, strand.field, prefix) { idx <- which(df_colnames %in% paste0(prefix, strand.field)) - if (length(idx) == 0L) + if (length(idx) == 0L) idx <- which(df_colnames %in% strand.field) if (length(idx) == 0L) return(NA_integer_) @@ -93,7 +96,7 @@ "chromosome", "chrom", "chr", "chromosome_name", "seqid"), - start.field="start", + start.field=c("start", "pos"), end.field=c("end", "stop"), strand.field="strand", ignore.strand=FALSE) @@ -144,7 +147,6 @@ ans } -### 'df' must be a data.frame or DataFrame object. makeGRangesFromDataFrame <- function(df, keep.extra.columns=FALSE, ignore.strand=FALSE, @@ -153,12 +155,14 @@ makeGRangesFromDataFrame <- function(df, "chromosome", "chrom", "chr", "chromosome_name", "seqid"), - start.field="start", + start.field=c("start", "pos"), end.field=c("end", "stop"), strand.field="strand", - starts.in.df.are.0based=FALSE) + starts.in.df.are.0based=FALSE, + as=c("auto", "GRanges", "GPos")) { - ## Check args. +### 'df' must be a data.frame or DataFrame object. + ## Check args. if (is.character(df)) # for people that provide the path to a file stop("'df' must be a data.frame or DataFrame object") if (!(is.data.frame(df) || is(df, "DataFrame"))) @@ -170,6 +174,7 @@ makeGRangesFromDataFrame <- function(df, ans_seqinfo <- normarg_seqinfo1(seqinfo) if (!isTRUEorFALSE(starts.in.df.are.0based)) stop("'starts.in.df.are.0based' must be TRUE or FALSE") + as <- match.arg(as) granges_cols <- .find_GRanges_cols(names(df), seqnames.field=seqnames.field, @@ -177,13 +182,18 @@ makeGRangesFromDataFrame <- function(df, end.field=end.field, strand.field=strand.field, ignore.strand=ignore.strand) - ## Prepare 'ans_seqnames'. ans_seqnames <- df[[granges_cols[["seqnames"]]]] ## Prepare 'ans_ranges'. ans_start <- .get_data_frame_col_as_numeric(df, granges_cols[["start"]]) ans_end <- .get_data_frame_col_as_numeric(df, granges_cols[["end"]]) + + if (identical(ans_start, ans_end) && identical(as, "auto")) + as <- "GPos" + else + as <- "GRanges" + if (starts.in.df.are.0based) ans_start <- ans_start + 1L ans_names <- rownames(df) @@ -229,8 +239,9 @@ makeGRangesFromDataFrame <- function(df, ans_seqinfo <- Seqinfo(seqlevels) } + FUN <- switch(as, GRanges = GRanges, GPos = GPos) ## Make and return the GRanges object. - GRanges(ans_seqnames, ans_ranges, strand=ans_strand, + FUN(ans_seqnames, ans_ranges, strand=ans_strand, ans_mcols, seqinfo=ans_seqinfo) } @@ -242,3 +253,10 @@ setAs("DataFrame", "GRanges", function(from) makeGRangesFromDataFrame(from, keep.extra.columns=TRUE) ) +setAs("data.frame", "GPos", + function(from) makeGRangesFromDataFrame(from, keep.extra.columns=TRUE) +) + +setAs("DataFrame", "GPos", + function(from) makeGRangesFromDataFrame(from, keep.extra.columns=TRUE) +) diff --git a/inst/unitTests/test_makeGRangesFromDataFrame.R b/inst/unitTests/test_makeGRangesFromDataFrame.R index 18830b52..3404db62 100644 --- a/inst/unitTests/test_makeGRangesFromDataFrame.R +++ b/inst/unitTests/test_makeGRangesFromDataFrame.R @@ -152,3 +152,18 @@ test_find_GRanges_cols <- function() checkIdentical(target, current) } + +test_makeGRangesFromDataFrame <- function() +{ + post <- data.frame(chr = rep(1, 3), pos = 11:13) + star <- data.frame(chr = rep(1, 3), start = 11:13) + endo <- data.frame(chr = rep(1, 3), end = 11:13) + checkTrue(validObject(makeGRangesFromDataFrame(post))) + checkTrue(validObject(makeGRangesFromDataFrame(star))) + checkTrue(validObject(makeGRangesFromDataFrame(endo))) + + target <- makeGRangesFromDataFrame(data.frame(seqnames=1:6, start=11:16)) + checkTrue(validObject(target)) + target <- makeGRangesFromDataFrame(data.frame(seqnames=1:6, end=11:16)) + checkTrue(validObject(target)) +} diff --git a/man/GenomicRangesList-class.Rd b/man/GenomicRangesList-class.Rd index e8ca4a29..0387f678 100644 --- a/man/GenomicRangesList-class.Rd +++ b/man/GenomicRangesList-class.Rd @@ -89,7 +89,7 @@ } Note that the \emph{Vector class hierarchy} has many more classes. In particular \link[S4Vectors]{Vector}, \link[S4Vectors]{List}, - \link[IRanges]{RangesList}, and \link[IRanges]{IntegerRangesList} + \link[IRanges]{IRangesList}, and \link[IRanges]{IntegerRangesList} have other subclasses not shown here. } diff --git a/man/makeGRangesFromDataFrame.Rd b/man/makeGRangesFromDataFrame.Rd index e42e2b65..6c983671 100644 --- a/man/makeGRangesFromDataFrame.Rd +++ b/man/makeGRangesFromDataFrame.Rd @@ -3,7 +3,9 @@ \alias{makeGRangesFromDataFrame} \alias{coerce,data.frame,GRanges-method} +\alias{coerce,data.frame,GPos-method} \alias{coerce,DataFrame,GRanges-method} +\alias{coerce,DataFrame,GPos-method} \title{Make a GRanges object from a data.frame or DataFrame} @@ -26,10 +28,11 @@ makeGRangesFromDataFrame(df, "chromosome", "chrom", "chr", "chromosome_name", "seqid"), - start.field="start", + start.field=c("start", "pos"), end.field=c("end", "stop"), strand.field="strand", - starts.in.df.are.0based=FALSE) + starts.in.df.are.0based=FALSE, + as=c("auto", "GRanges", "GPos")) } \arguments{ @@ -100,6 +103,12 @@ makeGRangesFromDataFrame(df, start" convention. A notorious example of such resource is the UCSC Table Browser (\url{http://genome.ucsc.edu/cgi-bin/hgTables}). } + \item{as}{ + A scalar character vector indicating the derivative of + \link{GenomicRanges} object to return, either \link{GRanges} or + \link{GPos}. By default, the derivative will be chosen based on + the data provided (\code{"auto"}). + } } \value{ @@ -177,6 +186,34 @@ makeGRangesFromDataFrame(df, seqinfo=paste0("chr", 4:1)) makeGRangesFromDataFrame(df, seqinfo=c(chrM=NA, chr1=500, chrX=100)) makeGRangesFromDataFrame(df, seqinfo=Seqinfo(paste0("chr", 4:1))) +## GPos objects are returned where appropriate +df <- data.frame(chr="chr1", pos = 11:15, score=1:5) +makeGRangesFromDataFrame(df) + +df <- data.frame(chr="chr1", start=11:15, end=11:15, + strand=c("+","-","+","*","."), score=1:5) +makeGRangesFromDataFrame(df) + +df <- data.frame(chr="chr1", start=11:15, score=1:5) +makeGRangesFromDataFrame(df) + +df <- data.frame(chr="chr1", end=11:15, score=1:5) +makeGRangesFromDataFrame(df) + +gr <- makeGRangesFromDataFrame(df, keep.extra.columns=TRUE) +gr2 <- as(df, "GPos") # equivalent to the above +stopifnot(identical(gr, gr2)) +gr2 <- GPos(df) # equivalent to the above +stopifnot(identical(gr, gr2)) + +makeGRangesFromDataFrame(df, ignore.strand=TRUE) +makeGRangesFromDataFrame(df, keep.extra.columns=TRUE, + ignore.strand=TRUE) + +makeGRangesFromDataFrame(df, seqinfo=paste0("chr", 4:1)) +makeGRangesFromDataFrame(df, seqinfo=c(chrM=NA, chr1=500, chrX=100)) +makeGRangesFromDataFrame(df, seqinfo=Seqinfo(paste0("chr", 4:1))) + ## --------------------------------------------------------------------- ## ABOUT AUTOMATIC DETECTION OF THE seqnames/start/end/strand COLUMNS ## ---------------------------------------------------------------------