-
Notifications
You must be signed in to change notification settings - Fork 18
/
GATAI.R
127 lines (108 loc) · 4.85 KB
/
GATAI.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#' @title Low level interface function with GATAI
#' @description This function interfaces with the GATAI software which removes the genes that create the \code{\link{PlotSignature}} pattern.
#' @param ExpressionSet a standard PhyloExpressionSet, DivergenceExpressionSet or PolymorphismsExpressionSet object.
#' @param singlecell_data a logical value indicating whether or not the input \code{ExpressionSet} is a single cell dataset.
#' @param gatai_results_path where shall GATAI results be stored. Default \code{gatai_results_path = tempdir()}.
#' @param result_overwrite Logical value indicating whether local results of previous GATAI run should be over-written and re-computed?
#' @param condaenv The conda environment to use. For \code{\link[reticulate]{use_condaenv}}, this can be the name, the absolute prefix path, or the absolute path to the python binary. If the name is ambiguous, the first environment is used and a warning is issued. For \code{\link[reticulate]{use_miniconda}}, the only conda installation searched is the one installed by \code{\link[reticulate]{install_miniconda}}.
#' @author Filipa Martins Costa and Hajk-Georg Drost
#' @export
GATAI <- function(ExpressionSet,
singlecell_data = FALSE,
gatai_results_path = tempdir(),
result_overwrite = FALSE,
condaenv = NULL) {
# use specific conda environment if selected
if (!is.null(condaenv)) {
reticulate::use_condaenv(condaenv = condaenv, required = TRUE)
}
# check if GATAI is properly installed
is_installed_gatai()
message("Starting GATAI optimisation ...")
# generate a tempdir version of the input ExpressionSet to run GATAI with random id attached
export_expressionset_for_gatai <- file.path(tempdir(), "expressionset.tsv")
# file path to extracted genes output file
extracted_genes_path <- file.path(gatai_results_path, "removed_genes", "extracted_genes.txt")
# check if a local version of GATAI result is already present
if(fs::file_exists(extracted_genes_path) && !result_overwrite){
message("It seems like results from a previous GATAI run were still present at '", extracted_genes_path, "'.")
message("This previous GATAI result was now imported. Please select argument 'result_overwrite = TRUE' in case you would like to re-calculate eveything with GATAI.")
removed_genes <- readr::read_tsv(
file.path(gatai_results_path, "removed_genes", "extracted_genes.txt"),
col_names = F,
show_col_types = FALSE
)
colnames(removed_genes) <- "removed_genes"
} else {
message(
"A local version of the input ExpressionSet was stored for GATAI runs at '",
export_expressionset_for_gatai,
"'."
)
# store input ExpressionSet in tempdir()
readr::write_delim(
ExpressionSet,
file = export_expressionset_for_gatai,
delim = "\t",
col_names = TRUE
)
# run GATAI
gatai <- reticulate::import("gatai")
if (!singlecell_data) {
# here we need to specify a path to tempdir()/gatai_results_path where removed genes are stored
gatai_command <- paste(
"gatai run_minimizer",
export_expressionset_for_gatai,
file.path(gatai_results_path, "removed_genes"),
"--save_stats"
)
} else {
# here we need to specify a path to tempdir()/gatai_results_path where removed genes are stored
gatai_command <- paste(
"gatai run_minimizer",
export_expressionset_for_gatai,
file.path(gatai_results_path,
"removed_genes")
, "--single_cell", "--save_stats")
}
# systems call with tryCatch
tryCatch(
{
system(gatai_command)
message("GATAI Optimization Complete.")
},
warning = function(w) {
message("Warning during GATAI optimization: ", conditionMessage(w))
},
error = function(e) {
message("Error during GATAI optimization: ", conditionMessage(e))
}
)
# print the path to GATAI results as message
message(
"GATAI results are stored at '",
file.path(gatai_results_path, "removed_genes"),
"'."
)
# read the extracted_genes.txt file
removed_genes <- readr::read_tsv(
file.path(gatai_results_path, "removed_genes", "extracted_genes.txt"),
col_names = F,
show_col_types = FALSE
)
colnames(removed_genes) <- "removed_genes"
}
# read the summary statistics file
statistics <- readr::read_tsv(
file.path(gatai_results_path, "removed_genes", "summary.txt"),
col_names = F,
show_col_types = FALSE
)
# print summary statistics as messages
colnames(statistics) <- "statistics"
message("Summary statistics:")
for (summary in statistics$statistics) {
message(summary)
}
return(removed_genes)
}