Skip to content

Commit

Permalink
Add additional input checks to check for frequently occuring input erros
Browse files Browse the repository at this point in the history
  • Loading branch information
browaeysrobin committed Aug 8, 2023
1 parent 9c58aba commit 57c41fa
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 2 deletions.
29 changes: 29 additions & 0 deletions R/muscat_de.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,20 +86,37 @@ perform_muscat_de_analysis = function(sce, sample_id, celltype_id, group_id, bat
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,celltype_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,celltype_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,celltype_id]))))){
stop("All the cell type labels in SummarizedExperiment::colData(sce)[,celltype_id] should be syntactically valid R names - see make.names")
}
}

if(is.factor(SummarizedExperiment::colData(sce)[,group_id])){
is_make_names = levels(SummarizedExperiment::colData(sce)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,group_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,group_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,group_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,group_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,group_id]))))){
stop("All the group/condition labels in SummarizedExperiment::colData(sce)[,group_id] should be syntactically valid R names - see make.names")
}
}
if(is.factor(SummarizedExperiment::colData(sce)[,sample_id])){
is_make_names = levels(SummarizedExperiment::colData(sce)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,sample_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,sample_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,sample_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,sample_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,sample_id]))))){
stop("All the sample_id labels in SummarizedExperiment::colData(sce)[,sample_id] should be syntactically valid R names - see make.names")
}
}


if(!is.character(contrasts)){
stop("contrasts should be a character vector")
}
Expand Down Expand Up @@ -173,9 +190,21 @@ perform_muscat_de_analysis = function(sce, sample_id, celltype_id, group_id, bat
sid = "id", # sample IDs (ctrl/stim.1234)
drop = FALSE) # drop all other SummarizedExperiment::colData columns ----------------- change to false

# test to see whether sample_ids are unique
if (sum(table(sce$sample_id, sce$group_id) %>% apply(1, function(row_oi){sum(row_oi > 0)}) > 1) > 0){
stop("One or more of your sample_ids belongs to more than one group/condition of interest. Please make sure that all sample_ids are uniquely divided over your groups/conditions.")
}

pb = muscat::aggregateData(sce,
assay = assay_oi_pb, fun = fun_oi_pb,
by = c("cluster_id", "sample_id"))

if(assay_oi_pb == "counts"){
libsizes = colSums(SummarizedExperiment::assay(pb))
if (!isTRUE(all(libsizes == floor(libsizes)))) {
warning("non-integer library sizes: are you sure you are working with raw counts?")
}
}

# prepare the experiment info (ei) table if batches present
if(length(batches) > 1){
Expand Down
122 changes: 122 additions & 0 deletions R/pipeline_wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,42 @@ get_abundance_expression_info = function(sce, sample_id, group_id, celltype_id,

requireNamespace("dplyr")
requireNamespace("ggplot2")

# if some of these are factors, and not all levels have syntactically valid names - prompt to change this
if(is.factor(SummarizedExperiment::colData(sce)[,celltype_id])){
is_make_names = levels(SummarizedExperiment::colData(sce)[,celltype_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,celltype_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,celltype_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,celltype_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,celltype_id]))))){
stop("All the cell type labels in SummarizedExperiment::colData(sce)[,celltype_id] should be syntactically valid R names - see make.names")
}
}

if(is.factor(SummarizedExperiment::colData(sce)[,group_id])){
is_make_names = levels(SummarizedExperiment::colData(sce)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,group_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,group_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,group_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,group_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,group_id]))))){
stop("All the group/condition labels in SummarizedExperiment::colData(sce)[,group_id] should be syntactically valid R names - see make.names")
}
}
if(is.factor(SummarizedExperiment::colData(sce)[,sample_id])){
is_make_names = levels(SummarizedExperiment::colData(sce)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,sample_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,sample_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,sample_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,sample_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,sample_id]))))){
stop("All the sample_id labels in SummarizedExperiment::colData(sce)[,sample_id] should be syntactically valid R names - see make.names")
}
}

### Receiver abundance plots

Expand Down Expand Up @@ -218,6 +254,76 @@ get_abundance_expression_info_separate = function(sce_receiver, sce_sender, samp
requireNamespace("dplyr")
requireNamespace("ggplot2")

# if some of these are factors, and not all levels have syntactically valid names - prompt to change this
if(is.factor(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver])){
is_make_names = levels(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver]) == make.names(levels(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver]))){
stop("The levels of the factor SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver])) == make.names(unique(sort(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver]))))){
stop("All the cell type labels in SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver] should be syntactically valid R names - see make.names")
}
}

if(is.factor(SummarizedExperiment::colData(sce_receiver)[,group_id])){
is_make_names = levels(SummarizedExperiment::colData(sce_receiver)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce_receiver)[,group_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_receiver)[,group_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce_receiver)[,group_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce_receiver)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce_receiver)[,group_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_receiver)[,group_id]))))){
stop("All the group/condition labels in SummarizedExperiment::colData(sce_receiver)[,group_id] should be syntactically valid R names - see make.names")
}
}
if(is.factor(SummarizedExperiment::colData(sce_receiver)[,sample_id])){
is_make_names = levels(SummarizedExperiment::colData(sce_receiver)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce_receiver)[,sample_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_receiver)[,sample_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce_receiver)[,sample_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce_receiver)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce_receiver)[,sample_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_receiver)[,sample_id]))))){
stop("All the sample_id labels in SummarizedExperiment::colData(sce_receiver)[,sample_id] should be syntactically valid R names - see make.names")
}
}
# if some of these are factors, and not all levels have syntactically valid names - prompt to change this
if(is.factor(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender])){
is_make_names = levels(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender]) == make.names(levels(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender]))){
stop("The levels of the factor SummarizedExperiment::colData(sce_sender)[,celltype_id_sender] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender])) == make.names(unique(sort(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_sender)[,celltype_id_sender]))))){
stop("All the cell type labels in SummarizedExperiment::colData(sce_sender)[,celltype_id_sender] should be syntactically valid R names - see make.names")
}
}

if(is.factor(SummarizedExperiment::colData(sce_sender)[,group_id])){
is_make_names = levels(SummarizedExperiment::colData(sce_sender)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce_sender)[,group_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_sender)[,group_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce_sender)[,group_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce_sender)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce_sender)[,group_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_sender)[,group_id]))))){
stop("All the group/condition labels in SummarizedExperiment::colData(sce_sender)[,group_id] should be syntactically valid R names - see make.names")
}
}
if(is.factor(SummarizedExperiment::colData(sce_sender)[,sample_id])){
is_make_names = levels(SummarizedExperiment::colData(sce_sender)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce_sender)[,sample_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_sender)[,sample_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce_sender)[,sample_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce_sender)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce_sender)[,sample_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_sender)[,sample_id]))))){
stop("All the sample_id labels in SummarizedExperiment::colData(sce_sender)[,sample_id] should be syntactically valid R names - see make.names")
}
}
### Receiver plots and info

metadata_abundance = SummarizedExperiment::colData(sce_receiver)[,c(sample_id, group_id, celltype_id_receiver)] %>% tibble::as_tibble()
Expand Down Expand Up @@ -567,18 +673,34 @@ get_DE_info = function(sce, sample_id, group_id, celltype_id, batches, covariate
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,celltype_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,celltype_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,celltype_id]))))){
stop("All the cell type labels in SummarizedExperiment::colData(sce)[,celltype_id] should be syntactically valid R names - see make.names")
}
}

if(is.factor(SummarizedExperiment::colData(sce)[,group_id])){
is_make_names = levels(SummarizedExperiment::colData(sce)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,group_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,group_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,group_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,group_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,group_id]))))){
stop("All the group/condition labels in SummarizedExperiment::colData(sce)[,group_id] should be syntactically valid R names - see make.names")
}
}
if(is.factor(SummarizedExperiment::colData(sce)[,sample_id])){
is_make_names = levels(SummarizedExperiment::colData(sce)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,sample_id]))
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,sample_id]))){
stop("The levels of the factor SummarizedExperiment::colData(sce)[,sample_id] should be a syntactically valid R names - see make.names")
}
} else{
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,sample_id])))
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,sample_id]))))){
stop("All the sample_id labels in SummarizedExperiment::colData(sce)[,sample_id] should be syntactically valid R names - see make.names")
}
}

if(!is.character(contrasts_oi)){
Expand Down
3 changes: 2 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ When applying MultiNicheNet on datasets with many samples and cell types, it is

## Frequently recurring questions and issues

* Even though it is stated in the vignettes, many reported issues arise because names of celltypes, groups/conditions, and/or samples are not syntactically valid. Before reporting your issue, make sure you satisfy this condition and other conditions described in the vignettes.
* Even though it is stated in the vignettes, many reported issues arise because names of celltypes, groups/conditions, and/or samples are not syntactically valid. Before reporting your issue, make sure you satisfy this condition and other conditions described in the vignettes. In the latest version of MultiNicheNet, input checks are run to check this and give an understandable error message.
* It is required that each sample is uniquely assigned to only one condition/group of interest. See the vignettes about paired and multifactorial analysis to see how to define your analysis input when you have multiple samples and conditions per patient. In the latest version of MultiNicheNet, input checks are run to check this and give an understandable error message.
* We strongly recommend having at least 4 samples in each of the groups/conditions you want to compare. With less samples, the benefits of performing a pseudobulk-based DE analysis are less clear and non-multi-sample tools for differential cell-cell communication might be better alternatives.

## References
Expand Down
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,14 @@ plots; and 2) interpreting the results and generating visualizations.
arise because names of celltypes, groups/conditions, and/or samples
are not syntactically valid. Before reporting your issue, make sure
you satisfy this condition and other conditions described in the
vignettes.
vignettes. In the latest version of MultiNicheNet, input checks are
run to check this and give an understandable error message.
- It is required that each sample is uniquely assigned to only one
condition/group of interest. See the vignettes about paired and
multifactorial analysis to see how to define your analysis input
when you have multiple samples and conditions per patient. In the
latest version of MultiNicheNet, input checks are run to check this
and give an understandable error message.
- We strongly recommend having at least 4 samples in each of the
groups/conditions you want to compare. With less samples, the
benefits of performing a pseudobulk-based DE analysis are less clear
Expand Down
Binary file modified tests/testthat/Rplots.pdf
Binary file not shown.

0 comments on commit 57c41fa

Please sign in to comment.