Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge fast processes #259

Merged
merged 26 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,4 @@ template:
skip_features:
- fastqc
- is_nfcore
version: 1.9.3
version: 1.10.0dev
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## v1.10.0dev

## Changes

1. Merged the following processes to improve efficiency of the pipeline:
- VCF index creation modules on output VCFs have been merged into the processes that created these VCFs
- The filter modules for `--filter` have been merged
- BED filtering and intersecting with Regions Of Interest have been merged

## v1.9.3 Nifty Nieuwkerke - [January 23 2025]

1. Fix db postprocess in vcf2db module
Expand Down
72 changes: 24 additions & 48 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,11 @@ process {
].join(" ")
}

withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:FILTER_BEDS\$" {
withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:PROCESS_BEDS\$" {
ext.prefix = { "${meta.id}.filter"}
ext.args = "-vE \"LOW_COVERAGE|NO_COVERAGE${params.keep_alt_contigs ? "" : "|alt|random|decoy|Un"}\""
ext.args2 = "-d 150"
}

withName: "^.*CRAM_PREPARE_SAMTOOLS_BEDTOOLS:BEDTOOLS_INTERSECT\$" {
ext.prefix = {"${meta.id}.intersect"}
ext.args = "-sorted"
ext.args3 = "-sorted"
}

/*
Expand Down Expand Up @@ -87,7 +83,7 @@ process {

withName: "^.*CRAM_CALL_GATK4:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.prefix = { "${meta.id}.${meta.caller}.g" }
ext.args = '--allow-overlaps --output-type z'
ext.args = '--allow-overlaps --output-type z --write-index=tbi'
}

withName: "^.*CRAM_CALL_GATK4:BCFTOOLS_STATS\$" {
Expand All @@ -108,7 +104,7 @@ process {

withName: "^.*BAM_CALL_ELPREP:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.prefix = { "${meta.id}.${meta.caller}.g" }
ext.args = '--allow-overlaps --output-type z'
ext.args = '--allow-overlaps --output-type z --write-index=tbi'
}

withName: "^.*BAM_CALL_ELPREP:BCFTOOLS_STATS\$" {
Expand Down Expand Up @@ -168,7 +164,7 @@ process {

withName: "^.*GVCF_JOINT_GENOTYPE_GATK4:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.prefix = { "${meta.id}.concat" }
ext.args = "--allow-overlaps --output-type z"
ext.args = "--allow-overlaps --output-type z --write-index=tbi"
}

/*
Expand All @@ -194,45 +190,29 @@ process {
}

withName: "^.*BAM_CALL_VARDICTJAVA:VCF_CONCAT_BCFTOOLS:BCFTOOLS_CONCAT\$" {
ext.args = '--allow-overlaps --output-type z'
ext.args = '--allow-overlaps --output-type z --write-index=tbi'
ext.prefix = {"${meta.id}.concat"}
}

withName: "^.*BAM_CALL_VARDICTJAVA:TABIX_VCFANNO\$" {
ext.prefix = {"${meta.id}.vcfanno"}
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
FILTER
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_1\$" {
ext.prefix = { "${meta.id}.filtered1" }
ext.args = {
meta.caller == "vardict" ?
"-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}' --output-type z":
meta.caller == "haplotypecaller" ?
"--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'":
meta.caller == "elprep" ?
"--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'":
""
}
}

withName: "^.*VCF_FILTER_BCFTOOLS:FILTER_2\$" {
ext.args = {
meta.caller == "vardict" ?
"--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0' --output-type z" :
meta.caller == "haplotypecaller" ?
'--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' :
meta.caller == "elprep" ?
'--output-type z --soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' :
""

}
ext.prefix = {"${meta.id}.filtered"}
withName: "^.*VCF_FILTER_BCFTOOLS:BCFTOOLS_FILTER\$" {
ext.prefix = { "${meta.id}.filtered" }
ext.args = { [
meta.caller == "vardict" ? "-i 'QUAL >= 0${params.only_pass ? " && FILTER=\"PASS\"" : ""}'" : "",
meta.caller == "haplotypecaller" ? "--soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'" : "",
meta.caller == "elprep" ? "--output-type z --soft-filter 'GATKCutoffSNP' -e 'TYPE=\"snp\" && (MQRankSum < -12.5 || ReadPosRankSum < -8.0 || QD < 2.0 || FS > 60.0 || MQ < 30.0)' -m '+'" : ""
].findAll { arg -> arg != "" }.join(" ") }
ext.args2 = { [
meta.caller == "vardict" ? "--soft-filter 'LowFreqBias' --mode '+' -e 'FORMAT/AF[0:*] < 0.02 && FORMAT/VD[0] < 30 && INFO/SBF < 0.1 && INFO/NM >= 2.0'" : "",
meta.caller == "haplotypecaller" ? '--soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : "",
meta.caller == "elprep" ? '--soft-filter \'GATKCutoffIndel\' -e \'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 )\' -m \'+\'' : "",
"--output-type z --write-index=tbi"
].findAll { arg -> arg != "" }.join(" ") }
}

/*
Expand All @@ -243,7 +223,7 @@ process {

withName: "^.*GERMLINE:BCFTOOLS_NORM\$" {
ext.prefix = {"${meta.id}.normalized"}
ext.args = "-m-"
ext.args = "-m- --output-type z --write-index=tbi"
}

/*
Expand All @@ -264,7 +244,7 @@ process {

withName: "^.*VCF_PED_RTGTOOLS:BCFTOOLS_ANNOTATE\$" {
ext.prefix = { "${meta.id}.${meta.caller}.ped.annotated" }
ext.args = "--output-type z"
ext.args = "--output-type z --write-index=tbi"
}

/*
Expand Down Expand Up @@ -301,20 +281,16 @@ process {
].join(' ').trim()}
}

withName: "^.*VCF_ANNOTATION:VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_CONCAT\$" {
withName: "^.*VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_CONCAT\$" {
ext.prefix = {"${meta.id}_concat"}
ext.args = "--allow-overlaps --output-type z"
}

withName: "^.*VCF_ANNOTATION:VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_SORT\$" {
withName: "^.*VCF_ANNOTATE_ENSEMBLVEP:BCFTOOLS_SORT\$" {
ext.prefix = {"${meta.id}.sorted"}
ext.args = "--write-index=tbi --output-type z"
}

withName: "^.*VCF_ANNOTATION:BGZIP_ANNOTATED_VCFS\$" {
ext.prefix = {"${meta.id}.vcfanno"}
}


/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
VALIDATION
Expand Down
19 changes: 2 additions & 17 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"bedtools/intersect": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"bedtools/merge": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
Expand Down Expand Up @@ -85,7 +80,7 @@
},
"ensemblvep/vep": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"git_sha": "ef36baef619ebe8a244fee313d44eba571ba73b4",
"installed_by": ["modules"],
"patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff"
},
Expand Down Expand Up @@ -191,16 +186,6 @@
"installed_by": ["modules"],
"patch": "modules/nf-core/somalier/relate/somalier-relate.diff"
},
"tabix/bgzip": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"tabix/bgziptabix": {
"branch": "master",
"git_sha": "f448e846bdadd80fc8be31fbbc78d9f5b5131a45",
"installed_by": ["modules"]
},
"tabix/tabix": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
Expand All @@ -225,7 +210,7 @@
},
"vcfanno": {
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"git_sha": "b1137e22798227331c9a9a12bd92bd6e865865c5",
"installed_by": ["modules"]
}
}
Expand Down
87 changes: 87 additions & 0 deletions modules/local/bcftools/filter/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
process BCFTOOLS_FILTER {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0':
'biocontainers/bcftools:1.20--h8b25389_0' }"

input:
tuple val(meta), path(vcf), path(tbi)

output:
tuple val(meta), path("*.${extension}"), emit: vcf
tuple val(meta), path("*.tbi") , emit: tbi, optional: true
tuple val(meta), path("*.csi") , emit: csi, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def last_args = args3 ?: args2 ?: args

extension = last_args.contains("--output-type b") || last_args.contains("-Ob") ? "bcf.gz" :
last_args.contains("--output-type u") || last_args.contains("-Ou") ? "bcf" :
last_args.contains("--output-type z") || last_args.contains("-Oz") ? "vcf.gz" :
last_args.contains("--output-type v") || last_args.contains("-Ov") ? "vcf" :
"vcf"

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

def filter_2 = args2 ? "| bcftools filter --threads ${task.cpus} ${args2}" : ""
def filter_3 = args3 ? "| bcftools filter --threads ${task.cpus} ${args3}" : ""

"""
bcftools filter \\
--threads ${task.cpus} \\
$args \\
$vcf \\
${filter_2} \\
${filter_3} \\
--output ${prefix}.${extension}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

def last_args = args3 ?: args2 ?: args

extension = last_args.contains("--output-type b") || last_args.contains("-Ob") ? "bcf.gz" :
last_args.contains("--output-type u") || last_args.contains("-Ou") ? "bcf" :
last_args.contains("--output-type z") || last_args.contains("-Oz") ? "vcf.gz" :
last_args.contains("--output-type v") || last_args.contains("-Ov") ? "vcf" :
"vcf"
def index = last_args.contains("--write-index=tbi") || last_args.contains("-W=tbi") ? "tbi" :
last_args.contains("--write-index=csi") || last_args.contains("-W=csi") ? "csi" :
last_args.contains("--write-index") || last_args.contains("-W") ? "csi" :
""
def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch"
def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : ""

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

"""
${create_cmd} ${prefix}.${extension}
${create_index}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
nextflow_process {

name "Test Process BEDTOOLS_INTERSECT"
name "Test Process BCFTOOLS_FILTER"
script "../main.nf"
process "BEDTOOLS_INTERSECT"
config "./nextflow.config"
process "BCFTOOLS_FILTER"

tag "modules"
tag "modules_nfcore"
tag "bedtools"
tag "bedtools/intersect"
tag "modules_local"
tag "bcftools"
tag "bcftools/filter"

test("sarscov2 - bed - bed") {
test("sarscov2 - 1 filter") {

config "./one_filter.config"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[]
]

input[1] = [[:], []]
"""
}
}
Expand All @@ -35,18 +34,18 @@ nextflow_process {

}

test("sarscov2 - bam - bam") {
test("sarscov2 - 2 filters") {

config "./two_filters.config"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/baits.bed', checkIfExists: true)
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[]
]

input[1] = [[:], []]
"""
}
}
Expand All @@ -60,20 +59,18 @@ nextflow_process {

}

test("sarscov2 - bed - stub") {
test("sarscov2 - 3 filters") {

options "-stub"
config "./three_filters.config"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[]
]

input[1] = [[:], []]
"""
}
}
Expand Down
Loading
Loading