Skip to content

Commit

Permalink
New functio compare_treatments
Browse files Browse the repository at this point in the history
compares multiple samples in multiple treatments, applying a user-defined function.
  • Loading branch information
zachary-foster committed Aug 17, 2017
1 parent ec38f35 commit 48c0b28
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 2 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: metacoder
Title: Tools for Parsing, Manipulating, and Graphing Hierarchical Data
Version: 0.1.3.9021
Version: 0.1.3.9022
Authors@R: c( person("Zachary", "Foster", email =
"zacharyfoster1989@gmail.com", role = c("aut", "cre")),
person("Niklaus", "Grunwald", email =
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ S3method(primersearch,character)
S3method(primersearch,taxmap)
export("%>%")
export(calc_obs_props)
export(compare_treatments)
export(contains)
export(diverging_palette)
export(drawDetails.resizingTextGrob)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
* New writer named `write_mothur_taxonomy` to create an imitation of the mothur taxonomy format.
* New writer named `write_unite_general` to create an imitation of the UNITE general FASTA release.
* New writer named `write_silva_fasta` to create an imitation of the SILVA FASTA release.
* New function names `compare_treatments` to compare multiple samples in multiple treatments, applying a user-defined function.

## metacoder 0.1.3

Expand Down
159 changes: 158 additions & 1 deletion R/calculations.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,161 @@ calc_obs_props <- function(obj, dataset, cols = NULL, keep_other_cols = TRUE) {
}

return(prop_table)
}
}






#' Compare treatments
#'
#' Apply a function to compare data, usually abundance, from pairs of
#' treatments. By default, every pairwise combination of treatments are
#' compared. A custom function can be supplied to perform the comparison.
#'
#' @param obj A taxmap object
#' @param dataset The name of a table in \code{obj} that contains data for each
#' sample in columns.
#' @param sample_ids The names of sample columns in \code{dataset}
#' @param treatments The treatment associated with each sample. Must be the same
#' order and length as \code{sample_ids}.
#' @param func The function to apply for each comparison. For each row in
#' \code{dataset}, for each combination of treatments, this function will
#' recieve the data for each treatment, passed a two character vecotors.
#' Therefor the function must take at least 2 arguments corresponding to the
#' two treatments compared. The function should return a vector or list or
#' results of a fixed length. If named, the names will be used in the output.
#' The names should be consistent as well. A simple example is
#' \code{function(x, y) mean(x) - mean(y)}. By default, the following function
#' is used:
#' \preformatted{
#' function(abund_1, abund_2) {
#' log_ratio <- log2(median(abund_1) / median(abund_2))
#' if (is.nan(log_ratio)) {
#' log_ratio <- 0
#' }
#' list(log2_median_ratio = log_ratio,
#' median_diff = median(abund_1) - median(abund_2),
#' mean_diff = mean(abund_1) - mean(abund_2),
#' wilcox_p_value = wilcox.test(abund_1, abund_2)$p.value)
#' }
#' }
#' @param combinations Which combinations of treatments to use. Must be a list
#' of vectors, each containing the names of 2 treatments to compare. By
#' default, all pairwise combinations of treatments are compared.
#' @param keep_cols If \code{TRUE}, preserve all columns not in
#' \code{sample_ids} in the output. If \code{FALSE}, dont keep other columns.
#' If a column names or indexes are supplied, only preserve those columns.
#'
#' @return A tibble
#'
#' @export
compare_treatments <- function(obj, dataset, sample_ids, treatments,
func = NULL, combinations = NULL,
keep_cols = TRUE) {
# Get abundance by sample data
abund_data <- obj$data[[dataset]]

# Define defualt function
if (is.null(func)) {
func <- function(abund_1, abund_2) {
log_ratio <- log2(median(abund_1) / median(abund_2))
if (is.nan(log_ratio)) {
log_ratio <- 0
}
list(log2_median_ratio = log_ratio,
median_diff = median(abund_1) - median(abund_2),
mean_diff = mean(abund_1) - mean(abund_2),
wilcox_p_value = wilcox.test(abund_1, abund_2)$p.value)
}
}

# Parse "keep_cols" option
kc_error_msg <- 'The "keep_cols" option must either be TRUE/FALSE or a vector of valid column names/indexes.'
if (is.logical(keep_cols)) {
if (length(keep_cols) != 1) {
stop(kc_error_msg)
} else if (keep_cols) {
keep_cols <- colnames(abund_data)[! colnames(abund_data) %in% sample_ids]
} else {
keep_cols <- c()
}
} else {
if (is.numeric(keep_cols)) {
invalid_cols <- keep_cols[! keep_cols %in% seq_len(ncol(abund_data))]
} else {
invalid_cols <- keep_cols[! keep_cols %in% colnames(abund_data)]
}
if (length(invalid_cols) > 0) {
stop(paste0(kc_error_msg,
" The following column names/indexes are not valid:\n",
" ", limited_print(invalid_cols, type = "silent")))
}
}

# Get every combination of treatments to compare
if (is.null(combinations)) {
combinations <- t(combn(unique(treatments), 2))
combinations <- lapply(seq_len(nrow(combinations)), function(i) combinations[i, ])
}

# Make function to compare one pair of treatments
one_comparison <- function(treat_1, treat_2) {
output <- lapply(seq_len(nrow(abund_data)), function(i) {
# get samples ids for each treatment
samples_1 <- sample_ids[treatments == treat_1]
samples_2 <- sample_ids[treatments == treat_2]

# get abundance data for each treatment
abund_1 <- unlist(abund_data[i, samples_1])
abund_2 <- unlist(abund_data[i, samples_2])

# Run user-supplied function on abundance info
result <- as.list(func(abund_1, abund_2))

# Complain if nothing is returned
if (length(result) == 0) {
stop(paste0("The function supplied returned nothing when given the following data:\n",
' Treatments compared: "', treat_1, '" vs "', treat_2, '"\n',
' Row index: ', i, '\n',
limited_print(abund_1, prefix = paste0(' "', treat_1, '" data: '), type = "silent"),
limited_print(abund_2, prefix = paste0(' "', treat_2, '" data: '), type = "silent")))
}

# If the output does not have names, add defaults
if (is.null(names(result))) {
if (length(result) == 1) {
names(result) <- "value"
} else {
names(result) <- paste("value_", seq_along(result))
}
}

do.call(data.frame, result)
})

# Combine the results for each row into a data frame
output <- as.data.frame(do.call(rbind, output))

# Add treatments compared
output <- cbind(data.frame(stringsAsFactors = FALSE,
treatment_1 = treat_1,
treatment_2 = treat_2),
output)

# Add in other columns if specified
output <- cbind(abund_data[, keep_cols], output)

return(output)
}

# Compare all pairs of treatments and combine
output <- lapply(seq_len(length(combinations)), function(i) {
one_comparison(combinations[[i]][1], combinations[[i]][2])
})
output <- do.call(rbind, output)

# Convert to tibble and return
dplyr::as.tbl(output)
}
58 changes: 58 additions & 0 deletions man/compare_treatments.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions scratch/issue_141--phyloseq_converter.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,22 @@ obj %>%
node_label = taxon_names)
```


##

```{r}
phy <- readRDS("scratch/example.phyloseq")
obj <- parse_phyloseq(phy)
# Convert counts to proportions
obj$data$otu_table <- calc_obs_props(obj,
dataset = "otu_table",
cols = obj$data$sam_data$sample_ids)
# Calculate difference between treatments
obj$data$diff_table <- compare_treatments(obj,
dataset = "otu_table",
sample_ids = obj$data$sam_data$sample_ids,
treatments = obj$data$sam_data$status)
```

0 comments on commit 48c0b28

Please sign in to comment.