-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcompare_alpha_diversity.R
158 lines (126 loc) · 5.59 KB
/
compare_alpha_diversity.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# @ Thomas W. Battaglia
#' Calculate alpha diversity statistics
#'
#' Compute p-values and multiple comparisons adjusted q-values for
#' two-group comparisons across multiple timepoints.
#'
#' @param physeq A phyloseq object
#' @param x Variable that describes Time
#' @param group Variable the describes different groups for comparisons
#' @param diversity Diversity metric to use for alpha diversity calculations
#' @param test_type type of test to use for significance testing
#' @param col_var Column name if diversity measurements are in mapping file
#' @param num_perm Number of permutations for non parametric tests
#' @param multiple_corrections Should multiple comparisons be corrected on pvalues
#' @param write Write table to file
#' @param filename Filename of the output results table
#' @param ... Additional arguments
#' @return A dataframe with taxa information and sample metadata
#' @export
compare_alpha_diversity <- function(physeq,
x = "Day",
group = "Treatment",
diversity = c("Observed", "Shannon", "Simpson"),
test_type = c("nonparametric", "parametric"),
col_var = "PD_whole_tree_alpha",
num_perm = 999,
multiple_corrections = T,
write = F,
filename = "results", ...){
if(class(physeq)=="phyloseq"){
message("Recognized input as phyloseq object. Proceeding with analysis.")
phylo_data <- phyloseq::plot_richness(physeq = physeq, x = x, measures = diversity, color = group)$data
}
if(class(physeq)=="data.frame"){
message("Recognized input as data-frame. Proceeding with analysis.")
phylo_data = physeq
names(phylo_data)[which(names(phylo_data)== col_var)] <- "value"
}
# Get xx levels
x_levels <- levels(phylo_data[ ,x])
# Get comparing groups
comparing_groups <- levels(phylo_data[ ,group])
# Make dataframe to store results
results <- data.frame(Group1 = as.character(),
Group2 = as.character(),
x = as.character(),
n = as.numeric(),
Group1_mean = as.numeric(),
Group1_std = as.numeric(),
Group2_mean = as.numeric(),
Group2_std = as.numeric(),
t_stat = as.numeric(),
pvalue = as.numeric())
# Create list of combinations
comb_list <- combn(comparing_groups, 2, simplify = F)
# For each combination of factors
for(comp in comb_list){
# Subset table for only comparisons on interest.
phylo_data_comsub <- phylo_data[phylo_data[ ,group] %in% comp,]
phylo_data_comsub[ ,group] <- droplevels(phylo_data_comsub[ ,group])
# Iterate over each x level. (Run through each timepoint)
for(i in x_levels){
# Subset table
data_table <- subset(phylo_data_comsub, phylo_data_comsub[ ,x] == i)
# Parametric T-test
if(test_type == "parametric"){
# Perform parametric t.test
sig_res <- try(t.test(data_table$value ~ data_table[ ,group]), silent = TRUE)
# If too little amount of samples are present for either group, result in None.
if(class(sig_res) == "try-error"){
tstat <- "NA"
pval <- 'NA'
}
else{
# If no error, assign results to variables
tstat <- sig_res[["statistic"]][[1]]
pval <- sig_res[["p.value"]][[1]]
}
}
# Non Parametric Monte Carlo Bootstrap
if(test_type == "nonparametric"){
# Perform non-parametric t.test monte carlo simulations
sig_res <- try(coin::oneway_test(data_table$value ~ data_table[ ,group], distribution = approximate(B = num_perm)), silent = TRUE)
# If too little amount of samples are present for either group, result in None.
if(class(sig_res) == "try-error"){
tstat <- "NA"
pval <- 'NA'
}
else{
# If no error, assign results to variables
tstat <- sig_res@statistic@teststatistic[[1]]
pval <- pvalue(sig_res)[[1]]
}
}
# Find which groups are being compared.
compared <- levels(data_table[ ,group])
# Find mean and standard deviations
means_out <- by(data = data_table$value, data_table[ ,group], mean)
std_out <- by(data = data_table$value, data_table[ ,group], sd)
# Place results into dataframe
results_mat <- data.frame(Group1 = compared[1],
Group2 = compared[2],
x = i,
n = nrow(data_table),
Group1_mean = means_out[1],
Group1_std = std_out[1],
Group2_mean = means_out[2],
Group2_std = std_out[2],
t_stat = tstat,
pvalue = pval,
row.names = NULL)
# Merge results with existing results
results <- rbind(results_mat, results)
} # EOF Loop through each value of x
} # EOF Comparing different groups
# Get multiple comparisons qvalue
if (multiple_corrections == T){
results$qvalue <- p.adjust(results$pvalue, method = "fdr")
}
# Write table if chosen
if (write == T){
write.csv(x = results, file = paste(filename, ".csv", sep = ""))
}
# Return Table of results
return(results)
}