diff --git a/assets/samplesheet_full.csv b/assets/samplesheet_full.csv new file mode 100644 index 00000000..36211ac7 --- /dev/null +++ b/assets/samplesheet_full.csv @@ -0,0 +1,5 @@ +sample,fastq_1,fastq_2,strandedness +GM12878,s3://nf-core-awsmegatests/rnaseq/input_data/SRX1603629_T1_1.fastq.gz,s3://nf-core-awsmegatests/rnaseq/input_data/SRX1603629_T1_2.fastq.gz,reverse +GM12878,s3://nf-core-awsmegatests/rnaseq/input_data/SRX1603630_T1_1.fastq.gz,s3://nf-core-awsmegatests/rnaseq/input_data/SRX1603630_T1_2.fastq.gz,reverse +K562,s3://nf-core-awsmegatests/rnaseq/input_data/SRX1603392_T1_1.fastq.gz,s3://nf-core-awsmegatests/rnaseq/input_data/SRX1603392_T1_2.fastq.gz,reverse +K562,s3://nf-core-awsmegatests/rnaseq/input_data/SRX1603393_T1_1.fastq.gz,s3://nf-core-awsmegatests/rnaseq/input_data/SRX1603393_T1_2.fastq.gz,reverse diff --git a/assets/samplesheet_test.csv b/assets/samplesheet_test.csv index 7131bd5d..fc6dfa21 100644 --- a/assets/samplesheet_test.csv +++ b/assets/samplesheet_test.csv @@ -1,2 +1,4 @@ sample,fastq_1,fastq_2,strandedness GM12878,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test.rnaseq_1.fastq.gz,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test.rnaseq_2.fastq.gz,reverse +TEST1,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test.rnaseq_1.fastq.gz,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test.rnaseq_2.fastq.gz,reverse +TEST2,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test.rnaseq_1.fastq.gz,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test.rnaseq_2.fastq.gz,reverse diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 7634b236..91327e5c 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -1,7 +1,4 @@ -#!/usr/bin/env python - -# TODO nf-core: Update the script to check the samplesheet -# This script is based on the example at: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv +#!/usr/bin/env python3 import os import sys @@ -10,7 +7,7 @@ def parse_args(args=None): - Description = "Reformat nf-core/rnavar samplesheet file and check its contents." + Description = "Reformat nf-core/rnaseq samplesheet file and check its contents." Epilog = "Example usage: python check_samplesheet.py " parser = argparse.ArgumentParser(description=Description, epilog=Epilog) @@ -29,63 +26,66 @@ def make_dir(path): def print_error(error, context="Line", context_str=""): - error_str = "ERROR: Please check samplesheet -> {}".format(error) + error_str = f"ERROR: Please check samplesheet -> {error}" if context != "" and context_str != "": - error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( - error, context.strip(), context_str.strip() - ) + error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'" print(error_str) sys.exit(1) -# TODO nf-core: Update the check_samplesheet function def check_samplesheet(file_in, file_out): """ This function checks that the samplesheet follows the following structure: - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, + sample,fastq_1,fastq_2,strandedness + SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz,forward + SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz,forward + SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,,forward For an example see: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv + https://github.com/nf-core/test-datasets/blob/rnaseq/samplesheet/v3.1/samplesheet_test.csv """ sample_mapping_dict = {} - with open(file_in, "r") as fin: + with open(file_in, "r", encoding='utf-8-sig') as fin: ## Check header - MIN_COLS = 2 - # TODO nf-core: Update the column names for the input samplesheet - HEADER = ["sample", "fastq_1", "fastq_2"] + MIN_COLS = 3 + HEADER = ["sample", "fastq_1", "fastq_2", "strandedness"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: - print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) + print( + f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}" + ) sys.exit(1) ## Check sample entries for line in fin: lspl = [x.strip().strip('"') for x in line.strip().split(",")] - # Check valid number of columns per row + ## Check valid number of columns per row if len(lspl) < len(HEADER): print_error( - "Invalid number of columns (minimum = {})!".format(len(HEADER)), + f"Invalid number of columns (minimum = {len(HEADER)})!", "Line", line, ) + num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), + f"Invalid number of populated columns (minimum = {MIN_COLS})!", "Line", line, ) ## Check sample name entries - sample, fastq_1, fastq_2 = lspl[: len(HEADER)] - sample = sample.replace(" ", "_") + sample, fastq_1, fastq_2, strandedness = lspl[: len(HEADER)] + if sample.find(" ") != -1: + print( + f"WARNING: Spaces have been replaced by underscores for sample: {sample}" + ) + sample = sample.replace(" ", "_") if not sample: print_error("Sample entry has not been specified!", "Line", line) @@ -101,16 +101,32 @@ def check_samplesheet(file_in, file_out): line, ) + ## Check strandedness + strandednesses = ["unstranded", "forward", "reverse"] + if strandedness: + if strandedness not in strandednesses: + print_error( + f"Strandedness must be one of '{', '.join(strandednesses)}'!", + "Line", + line, + ) + else: + print_error( + f"Strandedness has not been specified! Must be one of {', '.join(strandednesses)}.", + "Line", + line, + ) + ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fastq_1, fastq_2] + sample_info = [] ## [single_end, fastq_1, fastq_2, strandedness] if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2] + sample_info = ["0", fastq_1, fastq_2, strandedness] elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2] + sample_info = ["1", fastq_1, fastq_2, strandedness] else: print_error("Invalid combination of columns provided!", "Line", line) - ## Create sample mapping dictionary = { sample: [ single_end, fastq_1, fastq_2 ] } + ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, strandedness ]]} if sample not in sample_mapping_dict: sample_mapping_dict[sample] = [sample_info] else: @@ -124,17 +140,38 @@ def check_samplesheet(file_in, file_out): out_dir = os.path.dirname(file_out) make_dir(out_dir) with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2"]) + "\n") + fout.write( + ",".join(["sample", "single_end", "fastq_1", "fastq_2", "strandedness"]) + + "\n" + ) for sample in sorted(sample_mapping_dict.keys()): - ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample)) + ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + if not all( + x[0] == sample_mapping_dict[sample][0][0] + for x in sample_mapping_dict[sample] + ): + print_error( + f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", + "Sample", + sample, + ) + + ## Check that multiple runs of the same sample are of the same strandedness + if not all( + x[-1] == sample_mapping_dict[sample][0][-1] + for x in sample_mapping_dict[sample] + ): + print_error( + f"Multiple runs of a sample must have the same strandedness!", + "Sample", + sample, + ) for idx, val in enumerate(sample_mapping_dict[sample]): - fout.write(",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n") + fout.write(",".join([f"{sample}_T{idx+1}"] + val) + "\n") else: - print_error("No entries to process!", "Samplesheet: {}".format(file_in)) + print_error(f"No entries to process!", "Samplesheet: {file_in}") def main(args=None): diff --git a/conf/modules.config b/conf/modules.config index 865c1b44..3c32bd82 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -112,12 +112,19 @@ params { publish_dir = 'variant_calling' publish_files = ['vcf.gz':'', 'vcf.gz.tbi':''] } + 'gatk_indexfeaturefile' { + args = '' + suffix = '.haplotypecaller' + publish_by_meta = true + publish_dir = 'variant_calling' + publish_files = ['vcf.gz':'', 'vcf.gz.tbi':''] + } 'gatk_variantfilter' { args = '' suffix = '.filtered' publish_by_meta = true publish_dir = 'variant_filtering' - publish_files = ['vcf':'', 'vcf.idx':''] + publish_files = ['vcf.gz':'', 'vcf.gz.tbi':''] } 'fastqc' { args = "--quiet" diff --git a/conf/test.config b/conf/test.config index c56b48c1..8e85b5ac 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,7 +20,8 @@ params { max_time = 6.h // Input data - input = 'assets/samplesheet_test.csv' + //input = 'assets/samplesheet_test.csv' + input = 'assets/samplesheet_full.csv' // Genome references fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' diff --git a/modules.json b/modules.json index 5b2e7d52..c2cfbdf4 100644 --- a/modules.json +++ b/modules.json @@ -24,12 +24,21 @@ "gatk4/createsequencedictionary": { "git_sha": "3b600af50eae8264960df817277cfe303d2acd47" }, + "gatk4/indexfeaturefile": { + "git_sha": "1a4c7cec1b9d82fdaa15897d8e9a9e9a4767444d" + }, "gatk4/intervallisttools": { "git_sha": "3b600af50eae8264960df817277cfe303d2acd47" }, + "gatk4/mergevcfs": { + "git_sha": "3b600af50eae8264960df817277cfe303d2acd47" + }, "gatk4/splitncigarreads": { "git_sha": "3b600af50eae8264960df817277cfe303d2acd47" }, + "gatk4/variantfiltration": { + "git_sha": "3b600af50eae8264960df817277cfe303d2acd47" + }, "gffread": { "git_sha": "49da8642876ae4d91128168cd0db4f1c858d7792" }, diff --git a/modules/local/gatk4/applybqsr/functions.nf b/modules/local/gatk4/applybqsr/functions.nf deleted file mode 100644 index da9da093..00000000 --- a/modules/local/gatk4/applybqsr/functions.nf +++ /dev/null @@ -1,68 +0,0 @@ -// -// Utility functions used in nf-core DSL2 module files -// - -// -// Extract name of software tool from process name using $task.process -// -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -// -// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules -// -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.args3 = args.args3 ?: '' - options.publish_by_meta = args.publish_by_meta ?: [] - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -// -// Tidy up and join elements of a list to return a path string -// -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -// -// Function to save/publish module results -// -def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) - } - } - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } - } -} diff --git a/modules/local/gatk4/applybqsr/main.nf b/modules/local/gatk4/applybqsr/main.nf deleted file mode 100644 index 8e10a5f2..00000000 --- a/modules/local/gatk4/applybqsr/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -options = initOptions(params.options) - -process GATK4_APPLYBQSR { - tag "$meta.id" - label 'process_low' - publishDir "${params.outdir}", - mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) - if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" - } else { - container "quay.io/biocontainers/gatk4:4.2.0.0--0" - } - - input: - tuple val(meta), path(bam), path(bai), path(bqsr_table), path(intervals) - path fasta - path fastaidx - path dict - - output: - tuple val(meta), path("*.bam"), emit: bam - path "*.version.txt" , emit: version - - script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def interval = intervals ? "-L ${intervals}" : "" - """ - gatk ApplyBQSR \\ - -R $fasta \\ - -I $bam \\ - --bqsr-recal-file $bqsr_table \\ - $interval \\ - -O ${prefix}.bam \\ - $options.args - - echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//' > ${software}.version.txt - """ -} diff --git a/modules/local/gatk4/applybqsr/meta.yml b/modules/local/gatk4/applybqsr/meta.yml deleted file mode 100644 index 9bf12f09..00000000 --- a/modules/local/gatk4/applybqsr/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: gatk4_applybqsr -description: Apply base quality score recalibration (BQSR) to a bam file -keywords: - - bqsr - - bam -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file from alignment - pattern: "*.{bam}" - - bqsr_table: - type: file - description: Recalibration table from gatk4_baserecalibrator - - fasta: - type: file - description: The reference fasta file - - fastaidx: - type: file - description: Index of reference fasta file - - dict: - type: file - description: GATK sequence dictionary - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - bam: - type: file - description: Recalibrated BAM file - pattern: "*.{bam}" - -authors: - - "@yocra3" diff --git a/modules/local/gatk4/haplotypecaller/functions.nf b/modules/local/gatk4/haplotypecaller/functions.nf index da9da093..85628ee0 100644 --- a/modules/local/gatk4/haplotypecaller/functions.nf +++ b/modules/local/gatk4/haplotypecaller/functions.nf @@ -9,6 +9,13 @@ def getSoftwareName(task_process) { return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() } +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + // // Function to initialise default values and to generate a Groovy Map of available options for nf-core modules // @@ -37,32 +44,35 @@ def getPathFromList(path_list) { // Function to save/publish module results // def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] } + path = path instanceof String ? path : '' + path_list.add(path) } } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" } } diff --git a/modules/local/gatk4/haplotypecaller/main.nf b/modules/local/gatk4/haplotypecaller/main.nf index da90ba4b..fc0ee0e9 100644 --- a/modules/local/gatk4/haplotypecaller/main.nf +++ b/modules/local/gatk4/haplotypecaller/main.nf @@ -1,21 +1,21 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' params.options = [:] options = initOptions(params.options) process GATK4_HAPLOTYPECALLER { tag "$meta.id" - label 'process_low' + label 'process_medium' publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" + container "https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0" } else { - container "quay.io/biocontainers/gatk4:4.2.0.0--0" + container "quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0" } input: @@ -28,33 +28,35 @@ process GATK4_HAPLOTYPECALLER { val no_intervals output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path(interval), path("*.vcf"), emit: interval_vcf - path "*.version.txt" , emit: version + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path(interval), path("*.vcf.gz") , emit: interval_vcf + path "versions.yml" , emit: versions script: - def software = getSoftwareName(task.process) def prefix = options.suffix ? "${interval.baseName}_${meta.id}${options.suffix}" : "${interval.baseName}_${meta.id}" - def avail_mem = 3 + def avail_mem = 3 if (!task.memory) { log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } - def intervalsOptions = no_intervals ? "" : "-L ${interval}" - def dbsnpOptions = dbsnp ? "-D ${dbsnp}" : "" - //TODO allow ploidy argument here since we allow it for the cnv callers? or is this covered with options? Might unintuitive to use + def interval_option = no_intervals ? "" : "-L ${interval}" + def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" """ gatk \\ --java-options "-Xmx${avail_mem}g" \\ HaplotypeCaller \\ -R $fasta \\ -I $bam \\ - ${dbsnpOptions} \\ - ${intervalsOptions} \\ - -O ${prefix}.vcf \\ - $options.args + ${dbsnp_option} \\ + ${interval_option} \\ + -O ${prefix}.vcf.gz \\ + $options.args \\ + --tmp-dir . - gatk --version | grep Picard | sed "s/Picard Version: //g" > ${software}.version.txt + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS """ } diff --git a/modules/local/gatk4/mergevcfs/functions.nf b/modules/local/gatk4/mergevcfs/functions.nf deleted file mode 100644 index da9da093..00000000 --- a/modules/local/gatk4/mergevcfs/functions.nf +++ /dev/null @@ -1,68 +0,0 @@ -// -// Utility functions used in nf-core DSL2 module files -// - -// -// Extract name of software tool from process name using $task.process -// -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -// -// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules -// -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.args3 = args.args3 ?: '' - options.publish_by_meta = args.publish_by_meta ?: [] - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -// -// Tidy up and join elements of a list to return a path string -// -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -// -// Function to save/publish module results -// -def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) - } - } - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } - } -} diff --git a/modules/local/gatk4/splitncigarreads/functions.nf b/modules/local/gatk4/splitncigarreads/functions.nf deleted file mode 100644 index da9da093..00000000 --- a/modules/local/gatk4/splitncigarreads/functions.nf +++ /dev/null @@ -1,68 +0,0 @@ -// -// Utility functions used in nf-core DSL2 module files -// - -// -// Extract name of software tool from process name using $task.process -// -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -// -// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules -// -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.args3 = args.args3 ?: '' - options.publish_by_meta = args.publish_by_meta ?: [] - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -// -// Tidy up and join elements of a list to return a path string -// -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -// -// Function to save/publish module results -// -def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) - } - } - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } - } -} diff --git a/modules/local/gatk4/splitncigarreads/meta.yml b/modules/local/gatk4/splitncigarreads/meta.yml deleted file mode 100644 index e433cbf6..00000000 --- a/modules/local/gatk4/splitncigarreads/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: gatk4_splitncigarreads -description: Splits reads that contain Ns in their cigar string -keywords: - - vcf - - merge -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - bam: - type: list - description: BAM/SAM/CRAM file containing reads - pattern: "*.{bam,sam,cram}" - - fasta: - type: tuple of files - description: | - Tuple of fasta file (first), sequence dict (second) and fasta index (third) - pattern: ["*.fasta", "*.dict", "*.fai"] -output: - - bam: - type: file - description: Output file with split reads (BAM/SAM/CRAM) - pattern: "*.{bam,sam,cram}" - - version: - type: file - description: File containing software version - pattern: "*.version.txt" -authors: - - "@kevinmenden" diff --git a/modules/local/gatk4/variantfiltration/functions.nf b/modules/local/gatk4/variantfiltration/functions.nf deleted file mode 100644 index da9da093..00000000 --- a/modules/local/gatk4/variantfiltration/functions.nf +++ /dev/null @@ -1,68 +0,0 @@ -// -// Utility functions used in nf-core DSL2 module files -// - -// -// Extract name of software tool from process name using $task.process -// -def getSoftwareName(task_process) { - return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() -} - -// -// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules -// -def initOptions(Map args) { - def Map options = [:] - options.args = args.args ?: '' - options.args2 = args.args2 ?: '' - options.args3 = args.args3 ?: '' - options.publish_by_meta = args.publish_by_meta ?: [] - options.publish_dir = args.publish_dir ?: '' - options.publish_files = args.publish_files - options.suffix = args.suffix ?: '' - return options -} - -// -// Tidy up and join elements of a list to return a path string -// -def getPathFromList(path_list) { - def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries - paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes - return paths.join('/') -} - -// -// Function to save/publish module results -// -def saveFiles(Map args) { - if (!args.filename.endsWith('.version.txt')) { - def ioptions = initOptions(args.options) - def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_meta) { - def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta - for (key in key_list) { - if (args.meta && key instanceof String) { - def path = key - if (args.meta.containsKey(key)) { - path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] - } - path = path instanceof String ? path : '' - path_list.add(path) - } - } - } - if (ioptions.publish_files instanceof Map) { - for (ext in ioptions.publish_files) { - if (args.filename.endsWith(ext.key)) { - def ext_list = path_list.collect() - ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/$args.filename" - } - } - } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/$args.filename" - } - } -} diff --git a/modules/nf-core/modules/gatk4/indexfeaturefile/functions.nf b/modules/nf-core/modules/gatk4/indexfeaturefile/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/nf-core/modules/gatk4/indexfeaturefile/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/local/gatk4/splitncigarreads/main.nf b/modules/nf-core/modules/gatk4/indexfeaturefile/main.nf similarity index 52% rename from modules/local/gatk4/splitncigarreads/main.nf rename to modules/nf-core/modules/gatk4/indexfeaturefile/main.nf index d1c328ea..8f40a3e3 100644 --- a/modules/local/gatk4/splitncigarreads/main.nf +++ b/modules/nf-core/modules/gatk4/indexfeaturefile/main.nf @@ -1,12 +1,12 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' params.options = [:] options = initOptions(params.options) -process GATK4_SPLITNCIGARREADS { +process GATK4_INDEXFEATUREFILE { tag "$meta.id" - label 'process_medium' + label 'process_low' publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } @@ -19,25 +19,22 @@ process GATK4_SPLITNCIGARREADS { } input: - tuple val(meta), path(bam) - path(fasta) - path(fai) - path(dict) + tuple val(meta), path(feature_file) output: - tuple val(meta), path('*.bam'), path('*.bai'), emit: bam - path '*.version.txt' , emit: version + tuple val(meta), path("*.{tbi,idx}"), emit: index + path "versions.yml" , emit: versions script: - def software = getSoftwareName(task.process) - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" """ - gatk SplitNCigarReads \\ - -R $fasta \\ - -I $bam \\ - -O ${prefix}.bam \\ - $options.args + gatk \\ + IndexFeatureFile \\ + $options.args \\ + -I $feature_file - echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//' > ${software}.version.txt + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS """ } diff --git a/modules/nf-core/modules/gatk4/indexfeaturefile/meta.yml b/modules/nf-core/modules/gatk4/indexfeaturefile/meta.yml new file mode 100644 index 00000000..eebe6b85 --- /dev/null +++ b/modules/nf-core/modules/gatk4/indexfeaturefile/meta.yml @@ -0,0 +1,42 @@ +name: gatk4_indexfeaturefile +description: Creates an index for a feature file, e.g. VCF or BED file. +keywords: + - index + - feature +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ['BSD-3-clause'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - feature_file: + type: file + description: VCF/BED file + pattern: "*.{vcf,vcf.gz,bed,bed.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Index for VCF/BED file + pattern: "*.{tbi,idx}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@santiagorevale" diff --git a/modules/nf-core/modules/gatk4/mergevcfs/functions.nf b/modules/nf-core/modules/gatk4/mergevcfs/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/nf-core/modules/gatk4/mergevcfs/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/local/gatk4/mergevcfs/main.nf b/modules/nf-core/modules/gatk4/mergevcfs/main.nf similarity index 66% rename from modules/local/gatk4/mergevcfs/main.nf rename to modules/nf-core/modules/gatk4/mergevcfs/main.nf index 8b9747bc..28073fcb 100644 --- a/modules/local/gatk4/mergevcfs/main.nf +++ b/modules/nf-core/modules/gatk4/mergevcfs/main.nf @@ -1,5 +1,5 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' params.options = [:] options = initOptions(params.options) @@ -11,11 +11,11 @@ process GATK4_MERGEVCFS { mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" + container "https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0" } else { - container "quay.io/biocontainers/gatk4:4.2.0.0--0" + container "quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0" } input: @@ -24,11 +24,10 @@ process GATK4_MERGEVCFS { val use_ref_dict output: - tuple val(meta), path('*.vcf.gz'), path('*.vcf.gz.tbi'), emit: vcf - path '*.version.txt' , emit: version + tuple val(meta), path('*.vcf.gz'), emit: vcf + path "versions.yml" , emit: versions script: - def software = getSoftwareName(task.process) def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" // Make list of VCFs to merge @@ -44,6 +43,9 @@ process GATK4_MERGEVCFS { $ref \\ $options.args - echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//' > ${software}.version.txt + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS """ } diff --git a/modules/local/gatk4/mergevcfs/meta.yml b/modules/nf-core/modules/gatk4/mergevcfs/meta.yml similarity index 90% rename from modules/local/gatk4/mergevcfs/meta.yml rename to modules/nf-core/modules/gatk4/mergevcfs/meta.yml index 14b28fa0..597f9ec6 100644 --- a/modules/local/gatk4/mergevcfs/meta.yml +++ b/modules/nf-core/modules/gatk4/mergevcfs/meta.yml @@ -12,6 +12,7 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 + licence: ['Apache-2.0'] input: - meta: type: map @@ -34,9 +35,9 @@ output: type: file description: merged vcf file pattern: "*.vcf.gz" - - version: + - versions: type: file - description: File containing software version - pattern: "*.version.txt" + description: File containing software versions + pattern: "versions.yml" authors: - "@kevinmenden" diff --git a/modules/nf-core/modules/gatk4/variantfiltration/functions.nf b/modules/nf-core/modules/gatk4/variantfiltration/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/nf-core/modules/gatk4/variantfiltration/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/local/gatk4/variantfiltration/main.nf b/modules/nf-core/modules/gatk4/variantfiltration/main.nf similarity index 56% rename from modules/local/gatk4/variantfiltration/main.nf rename to modules/nf-core/modules/gatk4/variantfiltration/main.nf index d442aba8..c2c7a0b1 100644 --- a/modules/local/gatk4/variantfiltration/main.nf +++ b/modules/nf-core/modules/gatk4/variantfiltration/main.nf @@ -1,5 +1,5 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' params.options = [:] options = initOptions(params.options) @@ -11,34 +11,36 @@ process GATK4_VARIANTFILTRATION { mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } - conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0" + container "https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0" } else { - container "quay.io/biocontainers/gatk4:4.2.0.0--0" + container "quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0" } input: - tuple val(meta), path(vcf), path (tbi) + tuple val(meta), path(vcf), path(vcf_tbi) path fasta path fai path dict output: - tuple val(meta), path("*.vcf"), emit: vcf - path "*.version.txt" , emit: version - + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions script: - def software = getSoftwareName(task.process) def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" """ gatk VariantFiltration \\ -R $fasta \\ -V $vcf \\ - -O ${prefix}.vcf \\ + -O ${prefix}.vcf.gz \\ $options.args - echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//' > ${software}.version.txt + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS """ } diff --git a/modules/local/gatk4/variantfiltration/meta.yml b/modules/nf-core/modules/gatk4/variantfiltration/meta.yml similarity index 91% rename from modules/local/gatk4/variantfiltration/meta.yml rename to modules/nf-core/modules/gatk4/variantfiltration/meta.yml index d7f72582..6d4983a6 100644 --- a/modules/local/gatk4/variantfiltration/meta.yml +++ b/modules/nf-core/modules/gatk4/variantfiltration/meta.yml @@ -12,6 +12,7 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 + licence: ['Apache-2.0'] input: - meta: type: map @@ -39,9 +40,9 @@ output: type: file description: filtered VCF file pattern: "*.filtered.{vcf}" - - version: + - versions: type: file - description: File containing software version - pattern: "*.version.txt" + description: File containing software versions + pattern: "versions.yml" authors: - "@kevinmenden" diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index c1b07196..7edb4f75 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -26,6 +26,7 @@ workflow INPUT_CHECK { def create_fastq_channels(LinkedHashMap row) { def meta = [:] meta.id = row.sample + meta.sample = row.sample meta.single_end = row.single_end.toBoolean() def array = [] diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf index c930aaa4..1a9cba60 100644 --- a/workflows/rnavar.nf +++ b/workflows/rnavar.nf @@ -79,6 +79,7 @@ prepareToolIndices = params.aligner def publish_genome_options = params.save_reference ? [publish_dir: 'genome'] : [publish_files: false] def publish_index_options = params.save_reference ? [publish_dir: 'genome/index'] : [publish_files: false] +def untar_options = [publish_files: false] if (!params.save_reference) modules['star_genomegenerate']['publish_files'] = false @@ -109,14 +110,16 @@ include { GATK4_BASERECALIBRATOR } from '../modules/nf-core/modules/gatk4/baser include { GATK4_BEDTOINTERVALLIST } from '../modules/nf-core/modules/gatk4/bedtointervallist/main' addParams(options: modules['gatk_bedtointervallist']) include { GATK4_HAPLOTYPECALLER } from '../modules/local/gatk4/haplotypecaller/main' addParams(options: modules['gatk_haplotypecaller']) include { GATK4_INTERVALLISTTOOLS } from '../modules/nf-core/modules/gatk4/intervallisttools/main' addParams(options: modules['gatk_intervallisttools']) -include { GATK4_MERGEVCFS } from '../modules/local/gatk4/mergevcfs/main' addParams(options: modules['gatk_mergevcfs']) -include { GATK4_VARIANTFILTRATION } from '../modules/local/gatk4/variantfiltration/main' addParams(options: modules['gatk_variantfilter']) +include { GATK4_MERGEVCFS } from '../modules/nf-core/modules/gatk4/mergevcfs/main' addParams(options: modules['gatk_mergevcfs']) +include { GATK4_INDEXFEATUREFILE } from '../modules/nf-core/modules/gatk4/indexfeaturefile/main' addParams(options: modules['gatk_indexfeaturefile']) +include { GATK4_VARIANTFILTRATION } from '../modules/nf-core/modules/gatk4/variantfiltration/main' addParams(options: modules['gatk_variantfilter']) include { SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main' addParams(options: modules['samtools_index_genome']) include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' addParams( genome_options: publish_genome_options, index_options: publish_index_options, - star_index_options: modules['star_genomegenerate'] + star_index_options: modules['star_genomegenerate'], + star_untar_options: untar_options ) include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' addParams( align_options: modules['star_align'], @@ -288,6 +291,73 @@ workflow RNAVAR { bam_recalibrated_qc = RECALIBRATE.out.qc ch_versions = ch_versions.mix(RECALIBRATE.out.versions.first().ifEmpty(null)) + // MODULE: IntervalListTools from GATK4 + ch_interval_list_split = Channel.empty() + + if (!params.skip_intervallisttools) { + GATK4_INTERVALLISTTOOLS(ch_interval_list) + ch_interval_list_split = GATK4_INTERVALLISTTOOLS.out.interval_list.map{ meta, bed -> [bed] }.flatten() + } + else ch_interval_list_split = ch_interval_list + + // MODULE: HaplotypeCaller from GATK4 + interval_flag = params.no_intervals + haplotypecaller_vcf = Channel.empty() + + haplotypecaller_interval_bam = bam_recalibrated.combine(ch_interval_list_split) + .map{ meta, bam, bai, interval_list -> + new_meta = meta.clone() + new_meta.id = meta.id + "_" + interval_list.baseName + [new_meta, bam, bai, interval_list]} + + GATK4_HAPLOTYPECALLER( + haplotypecaller_interval_bam, + params.dbsnp_vcf, + params.dbsnp_vcf_index, + PREPARE_GENOME.out.dict, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai, + interval_flag + ) + + haplotypecaller_raw = GATK4_HAPLOTYPECALLER.out.vcf + .map{ meta, vcf -> + meta.id = meta.sample + [meta, vcf]} + .groupTuple() + + ch_versions = ch_versions.mix(GATK4_HAPLOTYPECALLER.out.versions.first().ifEmpty(null)) + use_ref_dict = true + + GATK4_MERGEVCFS( + haplotypecaller_raw, + PREPARE_GENOME.out.dict, + use_ref_dict + ) + haplotypecaller_vcf = GATK4_MERGEVCFS.out.vcf + ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions.first().ifEmpty(null)) + + GATK4_INDEXFEATUREFILE( + haplotypecaller_vcf + ) + haplotypecaller_vcf_tbi = haplotypecaller_vcf.join(GATK4_INDEXFEATUREFILE.out.index, by: [0]) + ch_versions = ch_versions.mix(GATK4_INDEXFEATUREFILE.out.versions.first().ifEmpty(null)) + + // MODULE: VariantFiltration from GATK4 + if (!params.skip_variantfiltration) { + + GATK4_VARIANTFILTRATION( + haplotypecaller_vcf_tbi, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai, + PREPARE_GENOME.out.dict + ) + + filtered_vcf = GATK4_VARIANTFILTRATION.out.vcf + filtered_vcf_tbi = GATK4_VARIANTFILTRATION.out.tbi + ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION.out.versions.first().ifEmpty(null)) + } + } CUSTOM_DUMPSOFTWAREVERSIONS (