Merge pull request #32 from jwfoley/devel

jwfoley · web-flow · commit 39388173bec0 · 2023-07-21T14:40:40.000+08:00
Devel
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,17 +1,17 @@
 Package: bioanalyzeR
 Title: Analysis of Agilent electrophoresis data
-Version: 0.10.0
+Version: 0.10.1
 Authors@R: 
     person(given = "Joseph",
            family = "Foley",
            role = c("aut", "cre"),
-           email = "jwfoley@stanford.edu",
+           email = "joe@jwfoley.com",
            comment = c(ORCID = "0000-0002-1579-5286"))
 Description: This package reads raw data and metadata from Agilent automated electrophoresis systems (Bioanalyzer, TapeStation, Fragment Analyzer, ZAG DNA Analyzer, Femto Pulse) into R-friendly data frames. It estimates concentration and molarity for all possible data points. Pre-defined plotting functions use ggplot2 to create attractive graphs of the raw data, as well as the estimated molecule length and molarity, and overlay the peaks and regions of interest reported by the Agilent software. Additional functions streamline common operations like calculating the total concentration or molarity within a region or the molar ratio between two regions.
-License: LGPL-3
+License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
-Imports: XML, base64enc, png, plyr, ggplot2
+Imports: XML, base64enc, plyr, ggplot2
 Suggests: 
     argparse,
     knitr,
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Joseph William Foley
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/R/bioanalyzer.R b/R/bioanalyzer.R
@@ -63,15 +63,25 @@ read.bioanalyzer <- function(xml.file, method = "hyman", extrapolate = FALSE) {
 				stringsAsFactors = F
 			)
 			
-			# align the observation times according to the markers in this sample		
+			# align the observation times according to the markers in this sample
+			alignment.coefficient <- NA
+			alignment.offset <- NA
 			which.lower.marker <- which(peaks$peak.observations == "Lower Marker" & peaks$concentration == defined.ladder.peaks$Concentration[1]) # check the concentration too because sometimes the software annotates more than one as the same marker with no consequences, and sometimes the size is off by a tiny bit, but the concentration is hardcoded
-			stopifnot("conflicting lower markers" = length(which.lower.marker) == 1)
-			if (has.upper.marker) {
+			if (length(which.lower.marker) > 1) {
+				warning(paste0("conflicting lower markers in ", batch, " well ", well.number), call. = F)
+			} else if (length(which.lower.marker) == 0) {
+				warning(paste0("no lower marker in ", batch, " well ", well.number), call. = F)
+			} else if (has.upper.marker) { # assay design has lower and upper marker
 				which.upper.marker <- which(peaks$peak.observations == "Upper Marker" & peaks$concentration == defined.ladder.peaks$Concentration[nrow(defined.ladder.peaks)])
-				stopifnot("conflicting upper markers" = length(which.upper.marker) == 1)
-				alignment.coefficient <- diff(peaks$aligned.time[c(which.lower.marker, which.upper.marker)]) / diff(peaks$time[c(which.lower.marker, which.upper.marker)])
-				alignment.offset <- peaks$aligned.time[which.lower.marker] - alignment.coefficient * peaks$time[which.lower.marker]
-			} else {
+				if (length(which.upper.marker) > 1) {
+					warning(paste0("conflicting upper markers in ", batch, " well ", well.number), call. = F)
+				} else if (length(which.upper.marker) == 0) {
+					warning(paste0("no upper marker in ", batch, " well ", well.number), call. = F)
+				} else {
+					alignment.coefficient <- diff(peaks$aligned.time[c(which.lower.marker, which.upper.marker)]) / diff(peaks$time[c(which.lower.marker, which.upper.marker)])
+					alignment.offset <- peaks$aligned.time[which.lower.marker] - alignment.coefficient * peaks$time[which.lower.marker]
+				}
+			} else { # assay design has only lower marker not upper
 				alignment.coefficient <- peaks$aligned.time[which.lower.marker] / peaks$time[which.lower.marker]
 				alignment.offset <- 0
 			}
@@ -80,7 +90,7 @@ read.bioanalyzer <- function(xml.file, method = "hyman", extrapolate = FALSE) {
 			list(
 				data = raw.data,
 				samples = data.frame(batch, well.number, sample.name, sample.observations, sample.comment, RIN, is.ladder, stringsAsFactors = F),
-				peaks = peaks,
+				peaks = if (nrow(peaks) > 0) peaks else NULL,
 				alignment.values = c(alignment.coefficient, alignment.offset)
 			)
 		}
diff --git a/R/summary.R b/R/summary.R
@@ -8,21 +8,31 @@
 #'
 #' @export
 summarize.subset <- function(sample.frame) {
-	total.molarity <- sum(sample.frame$molarity)
-	sample.median <- round(sample.frame$length[min(which(cumsum(sample.frame$molarity) >= total.molarity / 2))])
-	sample.mean <- sum(sample.frame$molarity * sample.frame$length) / total.molarity
-	length.residuals <- sample.frame$length - sample.mean
-	sample.sd <- sqrt(sum(sample.frame$molarity * length.residuals^2) / total.molarity)
-	sample.skewness <- sum(sample.frame$molarity * length.residuals^3) / total.molarity / sample.sd^3
-	sample.kurtosis <- sum(sample.frame$molarity * length.residuals^4) / total.molarity / sample.sd^4
-	
-	c(
-		Median = sample.median,
-		Mean = sample.mean,
-		SD = sample.sd,
-		Skewness = sample.skewness,
-		Kurtosis = sample.kurtosis
-	)
+	if (nrow(sample.frame) < 2) {
+		c(
+			Median = NA,
+			Mean = NA,
+			SD = NA,
+			Skewness = NA,
+			Kurtosis = NA
+		)
+	} else {
+		total.molarity <- sum(sample.frame$molarity)
+		sample.median <- round(sample.frame$length[min(which(cumsum(sample.frame$molarity) >= total.molarity / 2))])
+		sample.mean <- sum(sample.frame$molarity * sample.frame$length) / total.molarity
+		length.residuals <- sample.frame$length - sample.mean
+		sample.sd <- sqrt(sum(sample.frame$molarity * length.residuals^2) / total.molarity)
+		sample.skewness <- sum(sample.frame$molarity * length.residuals^3) / total.molarity / sample.sd^3
+		sample.kurtosis <- sum(sample.frame$molarity * length.residuals^4) / total.molarity / sample.sd^4
+		
+		c(
+			Median = sample.median,
+			Mean = sample.mean,
+			SD = sample.sd,
+			Skewness = sample.skewness,
+			Kurtosis = sample.kurtosis
+		)
+	}
 }
 
 
@@ -73,7 +83,7 @@ summarize.custom <- function(
 ) {
 	stopifnot("upper bound must be greater than lower bound" = upper.bound > lower.bound)
 	in.this.region <- in.custom.region(electrophoresis$data, lower.bound, upper.bound, "length")
-	result <- as.data.frame(t(simplify2array(by(electrophoresis$data[in.this.region,], electrophoresis$data$sample.index[in.this.region], summarize.subset))))
+	result <- as.data.frame(t(simplify2array(lapply(unique(electrophoresis$data$sample.index), function(index) summarize.subset(subset(electrophoresis$data, in.this.region & sample.index == index))))))
 	
 	if (lower.bound == -Inf) {
 		if (upper.bound != Inf) { # bounded only on right
diff --git a/README.md b/README.md
@@ -8,16 +8,16 @@ Install the required dependencies:
 
 Install the newest release (includes 22 MB of demo data):
 
-    > install.packages("https://github.com/jwfoley/bioanalyzeR/releases/download/v0.10.0/bioanalyzeR_0.10.0.tar.gz", repos = NULL)
+    > install.packages("https://github.com/jwfoley/bioanalyzeR/releases/download/v0.10.1/bioanalyzeR_0.10.1.tar.gz", repos = NULL)
 
 or, install the newest release without the demo data:
 
-    > install.packages("https://github.com/jwfoley/bioanalyzeR/releases/download/v0.10.0/bioanalyzeR_0.10.0-no_data.tar.gz", repos = NULL)
+    > install.packages("https://github.com/jwfoley/bioanalyzeR/releases/download/v0.10.1/bioanalyzeR_0.10.1-no_data.tar.gz", repos = NULL)
 
 
 # Documentation
 
-See the vignette [online](https://stanford.edu/~jwfoley/bioanalyzeR.html) or in R:
+See the vignette [online](https://jwfoley.com/bioanalyzeR.html) or in R:
 
     > vignette("bioanalyzeR")