#!/usr/bin/env nextflow /* ======================================================================================== nf-core/atacseq ======================================================================================== nf-core/atacseq Analysis Pipeline. #### Homepage / Documentation https://github.com/nf-core/atacseq ---------------------------------------------------------------------------------------- */ def helpMessage() { log.info nfcoreHeader() log.info""" Usage: The typical command for running the pipeline is as follows: nextflow run nf-core/atacseq --input design.csv --genome GRCh37 -profile docker Mandatory arguments: --input [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) (Default: './design.csv') --fasta [file] Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome (Default: false) --gtf [file] Path to GTF file. Not mandatory when using reference in iGenomes config via --genome (Default: false) -profile [str] Configuration profile to use. Can use multiple (comma separated) Available: conda, docker, singularity, awsbatch, test Generic --single_end [bool] Specifies that the input is single-end reads (Default: false) --seq_center [str] Sequencing center information to be added to read group of BAM files (Default: false) --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 0) --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) References If not specified in the configuration file or you wish to overwrite any of the references --genome [str] Name of iGenomes reference (Default: false) --bwa_index [file] Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa (Default: false) --gene_bed [file] Path to BED file containing gene intervals (Default: false) --tss_bed [file] Path to BED file containing transcription start sites (Default: false) --macs_gsize [str] Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 (Default: false) --blacklist [file] Path to blacklist regions (.BED format), used for filtering alignments (Default: false) --mito_name [str] Name of Mitochondrial chomosome in genome fasta (e.g. chrM). Reads aligning to this contig are filtered out (Default: false) --save_reference [bool] If generated by the pipeline save the BWA index in the results directory (Default: false) Trimming --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) --three_prime_clip_r2 [int] Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) --skip_trimming [bool] Skip the adapter trimming step (Default: false) --save_trimmed [bool] Save the trimmed FastQ files in the results directory (Default: false) Alignments --bwa_min_score [int] Don’t output BWA MEM alignments with score lower than this parameter (Default: false) --keep_mito [bool] Reads mapping to mitochondrial contig are not filtered from alignments (Default: false) --keep_dups [bool] Duplicate reads are not filtered from alignments (Default: false) --keep_multi_map [bool] Reads mapping to multiple locations are not filtered from alignments (Default: false) --skip_merge_replicates [bool] Do not perform alignment merging and downstream analysis by merging replicates i.e. only do this by merging resequenced libraries (Default: false) --save_align_intermeds [bool] Save the intermediate BAM files from the alignment step - not done by default (Default: false) Peaks --narrow_peak [bool] Run MACS2 in narrowPeak mode (Default: false) --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1) --macs_fdr [float] Minimum FDR (q-value) cutoff for peak detection, --macs_fdr and --macs_pvalue are mutually exclusive (Default: false) --macs_pvalue [float] p-value cutoff for peak detection, --macs_fdr and --macs_pvalue are mutually exclusive (Default: false) --min_reps_consensus [int] Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) --save_macs_pileup [bool] Instruct MACS2 to create bedGraph files normalised to signal per million reads (Default: false) --skip_peak_qc [bool] Skip MACS2 peak QC plot generation (Default: false) --skip_peak_annotation [bool] Skip annotation of MACS2 and consensus peaks with HOMER (Default: false) --skip_consensus_peaks [bool] Skip consensus peak generation (Default: false) Differential analysis --deseq2_vst [bool] Use vst transformation instead of rlog with DESeq2 (Default: false) --skip_diff_analysis [bool] Skip differential accessibility analysis (Default: false) QC --skip_fastqc [bool] Skip FastQC (Default: false) --skip_picard_metrics [bool] Skip Picard CollectMultipleMetrics (Default: false) --skip_preseq [bool] Skip Preseq (Default: false) --skip_plot_profile [bool] Skip deepTools plotProfile (Default: false) --skip_plot_fingerprint [bool] Skip deepTools plotFingerprint (Default: false) --skip_ataqv [bool] Skip Ataqv (Default: false) --skip_igv [bool] Skip IGV (Default: false) --skip_multiqc [bool] Skip MultiQC (Default: false) Other --outdir [file] The output directory where the results will be saved (Default: './results') --publish_dir_mode [str] Mode for publishing results in the output directory. Available: symlink, rellink, link, copy, copyNoFollow, move (Default: copy) --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits (Default: false) --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful (Default: false) --max_multiqc_email_size [str] Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic (Default: false) AWSBatch --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch --awsregion [str] The AWS Region for your AWS Batch job to run on --awscli [str] Path to the AWS CLI tool """.stripIndent() } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- SET UP CONFIGURATION VARIABLES -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Show help message if (params.help) { helpMessage() exit 0 } // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name custom_runName = params.name if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { custom_runName = workflow.runName } //////////////////////////////////////////////////// /* -- DEFAULT PARAMETER VALUES -- */ //////////////////////////////////////////////////// // Check if genome exists in the config file if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" } // Configurable variables params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false params.bwa_index = params.genome ? params.genomes[ params.genome ].bwa ?: false : false params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false params.gene_bed = params.genome ? params.genomes[ params.genome ].bed12 ?: false : false params.mito_name = params.genome ? params.genomes[ params.genome ].mito_name ?: false : false params.macs_gsize = params.genome ? params.genomes[ params.genome ].macs_gsize ?: false : false params.blacklist = params.genome ? params.genomes[ params.genome ].blacklist ?: false : false params.anno_readme = params.genome ? params.genomes[ params.genome ].readme ?: false : false // Global variables def PEAK_TYPE = params.narrow_peak ? 'narrowPeak' : 'broadPeak' //////////////////////////////////////////////////// /* -- CONFIG FILES -- */ //////////////////////////////////////////////////// // Pipeline config ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) ch_output_docs_images = file("$baseDir/docs/images/", checkIfExists: true) // JSON files required by BAMTools for alignment filtering if (params.single_end) { ch_bamtools_filter_config = file(params.bamtools_filter_se_config, checkIfExists: true) } else { ch_bamtools_filter_config = file(params.bamtools_filter_pe_config, checkIfExists: true) } // Header files for MultiQC ch_mlib_peak_count_header = file("$baseDir/assets/multiqc/mlib_peak_count_header.txt", checkIfExists: true) ch_mlib_frip_score_header = file("$baseDir/assets/multiqc/mlib_frip_score_header.txt", checkIfExists: true) ch_mlib_peak_annotation_header = file("$baseDir/assets/multiqc/mlib_peak_annotation_header.txt", checkIfExists: true) ch_mlib_deseq2_pca_header = file("$baseDir/assets/multiqc/mlib_deseq2_pca_header.txt", checkIfExists: true) ch_mlib_deseq2_clustering_header = file("$baseDir/assets/multiqc/mlib_deseq2_clustering_header.txt", checkIfExists: true) ch_mrep_peak_count_header = file("$baseDir/assets/multiqc/mrep_peak_count_header.txt", checkIfExists: true) ch_mrep_frip_score_header = file("$baseDir/assets/multiqc/mrep_frip_score_header.txt", checkIfExists: true) ch_mrep_peak_annotation_header = file("$baseDir/assets/multiqc/mrep_peak_annotation_header.txt", checkIfExists: true) ch_mrep_deseq2_pca_header = file("$baseDir/assets/multiqc/mrep_deseq2_pca_header.txt", checkIfExists: true) ch_mrep_deseq2_clustering_header = file("$baseDir/assets/multiqc/mrep_deseq2_clustering_header.txt", checkIfExists: true) //////////////////////////////////////////////////// /* -- VALIDATE INPUTS -- */ //////////////////////////////////////////////////// if (params.input) { ch_input = file(params.input, checkIfExists: true) } else { exit 1, 'Samples design file not specified!' } if (params.gtf) { ch_gtf = file(params.gtf, checkIfExists: true) } else { exit 1, 'GTF annotation file not specified!' } if (params.gene_bed) { ch_gene_bed = file(params.gene_bed, checkIfExists: true) } if (params.tss_bed) { ch_tss_bed = file(params.tss_bed, checkIfExists: true) } if (params.blacklist) { ch_blacklist = Channel.fromPath(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() } if (params.fasta) { lastPath = params.fasta.lastIndexOf(File.separator) bwa_base = params.fasta.substring(lastPath+1) ch_fasta = file(params.fasta, checkIfExists: true) } else { exit 1, 'Fasta file not specified!' } if (params.bwa_index) { lastPath = params.bwa_index.lastIndexOf(File.separator) bwa_dir = params.bwa_index.substring(0,lastPath+1) bwa_base = params.bwa_index.substring(lastPath+1) Channel .fromPath(bwa_dir, checkIfExists: true) .set { ch_bwa_index } } // Save AWS IGenomes file containing annotation version if (params.anno_readme && file(params.anno_readme).exists()) { file("${params.outdir}/genome/").mkdirs() file(params.anno_readme).copyTo("${params.outdir}/genome/") } //////////////////////////////////////////////////// /* -- AWS -- */ //////////////////////////////////////////////////// if (workflow.profile.contains('awsbatch')) { // AWSBatch sanity checking if (!params.awsqueue || !params.awsregion) exit 1, 'Specify correct --awsqueue and --awsregion parameters on AWSBatch!' // Check outdir paths to be S3 buckets if running on AWSBatch // related: https://github.com/nextflow-io/nextflow/issues/813 if (!params.outdir.startsWith('s3:')) exit 1, 'Outdir not on S3 - specify S3 Bucket to run on AWSBatch!' // Prevent trace files to be stored on S3 since S3 does not support rolling files. if (params.tracedir.startsWith('s3:')) exit 1, 'Specify a local tracedir or run without trace! S3 cannot be used for tracefiles.' } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- HEADER LOG INFO -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Header log info log.info nfcoreHeader() def summary = [:] summary['Run Name'] = custom_runName ?: workflow.runName summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' summary['Design File'] = params.input summary['Genome'] = params.genome ?: 'Not supplied' summary['Fasta File'] = params.fasta summary['GTF File'] = params.gtf if (params.gene_bed) summary['Gene BED File'] = params.gene_bed if (params.tss_bed) summary['TSS BED File'] = params.tss_bed if (params.bwa_index) summary['BWA Index'] = params.bwa_index if (params.blacklist) summary['Blacklist BED'] = params.blacklist if (params.mito_name) summary['Mitochondrial Contig'] = params.mito_name if (params.bwa_min_score) summary['BWA Min Score'] = params.bwa_min_score summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' summary['Min Consensus Reps'] = params.min_reps_consensus if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrow_peak ? 'Yes' : 'No' if (!params.narrow_peak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff if (params.macs_fdr) summary['MACS2 FDR'] = params.macs_fdr if (params.macs_pvalue) summary['MACS2 P-value'] = params.macs_pvalue if (params.skip_trimming) { summary['Trimming Step'] = 'Skipped' } else { summary['Trim R1'] = "$params.clip_r1 bp" summary['Trim R2'] = "$params.clip_r2 bp" summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" summary['NextSeq Trim'] = "$params.trim_nextseq bp" } if (params.seq_center) summary['Sequencing Center'] = params.seq_center if (params.single_end) summary['Fragment Size'] = "$params.fragment_size bp" summary['Fingerprint Bins'] = params.fingerprint_bins if (params.keep_mito) summary['Keep Mitochondrial'] = 'Yes' if (params.keep_dups) summary['Keep Duplicates'] = 'Yes' if (params.keep_multi_map) summary['Keep Multi-mapped'] = 'Yes' summary['Save Genome Index'] = params.save_reference ? 'Yes' : 'No' if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' if (params.save_align_intermeds) summary['Save Intermeds'] = 'Yes' if (params.save_macs_pileup) summary['Save MACS2 Pileup'] = 'Yes' if (params.skip_merge_replicates) summary['Skip Merge Replicates'] = 'Yes' if (params.skip_peak_qc) summary['Skip MACS2 Peak QC'] = 'Yes' if (params.skip_peak_annotation) summary['Skip Peak Annotation'] = 'Yes' if (params.skip_consensus_peaks) summary['Skip Consensus Peaks'] = 'Yes' if (params.deseq2_vst) summary['Use DESeq2 vst Transform'] = 'Yes' if (params.skip_diff_analysis) summary['Skip Differential Analysis'] = 'Yes' if (params.skip_fastqc) summary['Skip FastQC'] = 'Yes' if (params.skip_picard_metrics) summary['Skip Picard Metrics'] = 'Yes' if (params.skip_preseq) summary['Skip Preseq'] = 'Yes' if (params.skip_plot_profile) summary['Skip plotProfile'] = 'Yes' if (params.skip_plot_fingerprint) summary['Skip plotFingerprint'] = 'Yes' if (params.skip_ataqv) summary['Skip Ataqv'] = 'Yes' if (params.skip_igv) summary['Skip IGV'] = 'Yes' if (params.skip_multiqc) summary['Skip MultiQC'] = 'Yes' summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" summary['Output Dir'] = params.outdir summary['Launch Dir'] = workflow.launchDir summary['Working Dir'] = workflow.workDir summary['Script Dir'] = workflow.projectDir summary['User'] = workflow.userName if (workflow.profile.contains('awsbatch')) { summary['AWS Region'] = params.awsregion summary['AWS Queue'] = params.awsqueue summary['AWS CLI'] = params.awscli } summary['Config Profile'] = workflow.profile if (params.config_profile_description) summary['Config Description'] = params.config_profile_description if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact if (params.config_profile_url) summary['Config URL'] = params.config_profile_url if (params.email || params.email_on_fail) { summary['E-mail Address'] = params.email summary['E-mail on failure'] = params.email_on_fail summary['MultiQC Max Size'] = params.max_multiqc_email_size } log.info summary.collect { k,v -> "${k.padRight(21)}: $v" }.join('\n') log.info "-\033[2m--------------------------------------------------\033[0m-" // Check the hostnames against configured profiles checkHostname() // Show a big warning message if we're not running MACS if (!params.macs_gsize) { def warnstring = params.genome ? "supported for '${params.genome}'" : 'supplied' log.warn "=================================================================\n" + " WARNING! MACS genome size parameter not $warnstring.\n" + " Peak calling, annotation and differential analysis will be skipped.\n" + " Please specify value for '--macs_gsize' to run these steps.\n" + "=======================================================================" } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- PARSE DESIGN FILE -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * PREPROCESSING: Reformat design file and check validitiy */ process CHECK_DESIGN { tag "$design" publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode input: path design from ch_input output: path '*.csv' into ch_design_reads_csv script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ """ check_design.py $design design_reads.csv """ } /* * Create channels for input fastq files */ if (params.single_end) { ch_design_reads_csv .splitCsv(header:true, sep:',') .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] } .into { ch_raw_reads_fastqc; ch_raw_reads_trimgalore; design_replicates_exist; design_multiple_samples } } else { ch_design_reads_csv .splitCsv(header:true, sep:',') .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true), file(row.fastq_2, checkIfExists: true) ] ] } .into { ch_raw_reads_fastqc; ch_raw_reads_trimgalore; design_replicates_exist; design_multiple_samples } } // Boolean value for replicates existing in design replicatesExist = design_replicates_exist .map { it -> it[0].split('_')[-2].replaceAll('R','').toInteger() } .flatten() .max() .val > 1 // Boolean value for multiple groups existing in design multipleGroups = design_multiple_samples .map { it -> it[0].split('_')[0..-3].join('_') } .flatten() .unique() .count() .val > 1 /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- PREPARE ANNOTATION FILES -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * PREPROCESSING: Build BWA index */ if (!params.bwa_index) { process BWA_INDEX { tag "$fasta" label 'process_high' publishDir path: { params.save_reference ? "${params.outdir}/genome" : params.outdir }, saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode input: path fasta from ch_fasta output: path 'BWAIndex' into ch_bwa_index script: """ bwa index -a bwtsw $fasta mkdir BWAIndex && mv ${fasta}* BWAIndex """ } } /* * PREPROCESSING: Generate gene BED file */ // If --gtf is supplied along with --genome // Make gene bed from supplied --gtf instead of using iGenomes one automatically def MAKE_BED = false if (!params.gene_bed) { MAKE_BED = true } else if (params.genome && params.gtf) { if (params.genomes[ params.genome ].gtf != params.gtf) { MAKE_BED = true } } if (MAKE_BED) { process MAKE_GENE_BED { tag "$gtf" label 'process_low' publishDir "${params.outdir}/genome", mode: params.publish_dir_mode input: path gtf from ch_gtf output: path '*.bed' into ch_gene_bed script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ """ gtf2bed $gtf > ${gtf.baseName}.bed """ } } /* * PREPROCESSING: Generate TSS BED file */ if (!params.tss_bed) { process MAKE_TSS_BED { tag "$bed" publishDir "${params.outdir}/genome", mode: params.publish_dir_mode input: path bed from ch_gene_bed output: path '*.bed' into ch_tss_bed script: """ cat $bed | awk -v FS='\t' -v OFS='\t' '{ if(\$6=="+") \$3=\$2+1; else \$2=\$3-1; print \$1, \$2, \$3, \$4, \$5, \$6;}' > ${bed.baseName}.tss.bed """ } } /* * PREPROCESSING: Prepare genome intervals for filtering */ process MAKE_GENOME_FILTER { tag "$fasta" publishDir "${params.outdir}/genome", mode: params.publish_dir_mode input: path fasta from ch_fasta path blacklist from ch_blacklist.ifEmpty([]) output: path "$fasta" // FASTA FILE FOR IGV path '*.fai' // FAI INDEX FOR REFERENCE GENOME path '*.bed' into ch_genome_filter_regions // BED FILE WITHOUT BLACKLIST REGIONS & MITOCHONDRIAL CONTIG FOR FILTERING path '*.txt' into ch_genome_autosomes // TEXT FILE CONTAINING LISTING OF AUTOSOMAL CHROMOSOMES FOR ATAQV path '*.sizes' into ch_genome_sizes_mlib_bigwig, // CHROMOSOME SIZES FILE FOR BEDTOOLS ch_genome_sizes_mrep_bigwig script: blacklist_filter = params.blacklist ? "sortBed -i $blacklist -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" name_filter = params.mito_name ? "| awk '\$1 !~ /${params.mito_name}/ {print \$0}'": '' mito_filter = params.keep_mito ? '' : name_filter """ samtools faidx $fasta get_autosomes.py ${fasta}.fai ${fasta}.autosomes.txt cut -f 1,2 ${fasta}.fai > ${fasta}.sizes $blacklist_filter $mito_filter > ${fasta}.include_regions.bed """ } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- FASTQ QC -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 1: FastQC */ process FASTQC { tag "$name" label 'process_medium' publishDir "${params.outdir}/fastqc", mode: params.publish_dir_mode, saveAs: { filename -> filename.endsWith('.zip') ? "zips/$filename" : filename } when: !params.skip_fastqc input: tuple val(name), path(reads) from ch_raw_reads_fastqc output: path '*.{zip,html}' into ch_fastqc_reports_mqc script: // Added soft-links to original fastqs for consistent naming in MultiQC if (params.single_end) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz fastqc -q -t $task.cpus ${name}.fastq.gz """ } else { """ [ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz [ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz fastqc -q -t $task.cpus ${name}_1.fastq.gz fastqc -q -t $task.cpus ${name}_2.fastq.gz """ } } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- ADAPTER TRIMMING -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 2: Trim Galore! */ if (params.skip_trimming) { ch_trimmed_reads = ch_raw_reads_trimgalore ch_trimgalore_results_mqc = Channel.empty() ch_trimgalore_fastqc_reports_mqc = Channel.empty() } else { process TRIMGALORE { tag "$name" label 'process_high' publishDir "${params.outdir}/trim_galore", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.html')) "fastqc/$filename" else if (filename.endsWith('.zip')) "fastqc/zips/$filename" else if (filename.endsWith('trimming_report.txt')) "logs/$filename" else params.save_trimmed ? filename : null } input: tuple val(name), path(reads) from ch_raw_reads_trimgalore output: tuple val(name), path('*.fq.gz') into ch_trimmed_reads path '*.txt' into ch_trimgalore_results_mqc path '*.{zip,html}' into ch_trimgalore_fastqc_reports_mqc script: // Calculate number of --cores for TrimGalore based on value of task.cpus // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 // See: https://github.com/nf-core/atacseq/pull/65 def cores = 1 if (task.cpus) { cores = (task.cpus as int) - 4 if (params.single_end) cores = (task.cpus as int) - 3 if (cores < 1) cores = 1 if (cores > 4) cores = 4 } // Added soft-links to original fastqs for consistent naming in MultiQC c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' nextseq = params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' // Added soft-links to original fastqs for consistent naming in MultiQC if (params.single_end) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz trim_galore --cores $cores --fastqc --gzip $c_r1 $tpc_r1 $nextseq ${name}.fastq.gz """ } else { """ [ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz [ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz trim_galore --cores $cores --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 $nextseq ${name}_1.fastq.gz ${name}_2.fastq.gz """ } } } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- ALIGN -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 3.1: Map read(s) with bwa mem */ process BWA_MEM { tag "$name" label 'process_high' input: tuple val(name), path(reads) from ch_trimmed_reads path index from ch_bwa_index.collect() output: tuple val(name), path('*.bam') into ch_bwa_bam script: prefix = "${name}.Lb" rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" if (params.seq_center) { rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'" } score = params.bwa_min_score ? "-T ${params.bwa_min_score}" : '' """ bwa mem \\ -t $task.cpus \\ -M \\ -R $rg \\ $score \\ ${index}/${bwa_base} \\ $reads \\ | samtools view -@ $task.cpus -b -h -F 0x0100 -O BAM -o ${prefix}.bam - """ } /* * STEP 3.2: Convert BAM to coordinate sorted BAM */ process SORT_BAM { tag "$name" label 'process_medium' if (params.save_align_intermeds) { publishDir path: "${params.outdir}/bwa/library", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.flagstat')) "samtools_stats/$filename" else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" else if (filename.endsWith('.stats')) "samtools_stats/$filename" else filename } } input: tuple val(name), path(bam) from ch_bwa_bam output: tuple val(name), path('*.sorted.{bam,bam.bai}') into ch_sort_bam_merge path '*.{flagstat,idxstats,stats}' into ch_sort_bam_flagstat_mqc script: prefix = "${name}.Lb" """ samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $name $bam samtools index ${prefix}.sorted.bam samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats """ } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- MERGE LIBRARY BAM -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 4.1: Merge BAM files for all libraries from same sample replicate */ ch_sort_bam_merge .map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } .groupTuple(by: [0]) .map { it -> [ it[0], it[1].flatten() ] } .set { ch_sort_bam_merge } process MERGED_LIB_BAM { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.flagstat')) "samtools_stats/$filename" else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" else if (filename.endsWith('.stats')) "samtools_stats/$filename" else if (filename.endsWith('.metrics.txt')) "picard_metrics/$filename" else params.save_align_intermeds ? filename : null } input: tuple val(name), path(bams) from ch_sort_bam_merge output: tuple val(name), path("*${prefix}.sorted.{bam,bam.bai}") into ch_mlib_bam_filter, ch_mlib_bam_preseq, ch_mlib_bam_ataqv path '*.{flagstat,idxstats,stats}' into ch_mlib_bam_stats_mqc path '*.txt' into ch_mlib_bam_metrics_mqc script: prefix = "${name}.mLb.mkD" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() def avail_mem = 3 if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.toGiga() } if (bam_files.size() > 1) { """ picard -Xmx${avail_mem}g MergeSamFiles \\ ${'INPUT='+bam_files.join(' INPUT=')} \\ OUTPUT=${name}.sorted.bam \\ SORT_ORDER=coordinate \\ VALIDATION_STRINGENCY=LENIENT \\ TMP_DIR=tmp samtools index ${name}.sorted.bam picard -Xmx${avail_mem}g MarkDuplicates \\ INPUT=${name}.sorted.bam \\ OUTPUT=${prefix}.sorted.bam \\ ASSUME_SORTED=true \\ REMOVE_DUPLICATES=false \\ METRICS_FILE=${prefix}.MarkDuplicates.metrics.txt \\ VALIDATION_STRINGENCY=LENIENT \\ TMP_DIR=tmp samtools index ${prefix}.sorted.bam samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats """ } else { """ picard -Xmx${avail_mem}g MarkDuplicates \\ INPUT=${bam_files[0]} \\ OUTPUT=${prefix}.sorted.bam \\ ASSUME_SORTED=true \\ REMOVE_DUPLICATES=false \\ METRICS_FILE=${prefix}.MarkDuplicates.metrics.txt \\ VALIDATION_STRINGENCY=LENIENT \\ TMP_DIR=tmp samtools index ${prefix}.sorted.bam samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats """ } } /* * STEP 4.2: Filter BAM file at merged library-level */ process MERGED_LIB_BAM_FILTER { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode, saveAs: { filename -> if (params.single_end || params.save_align_intermeds) { if (filename.endsWith('.flagstat')) "samtools_stats/$filename" else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" else if (filename.endsWith('.stats')) "samtools_stats/$filename" else if (filename.endsWith('.sorted.bam')) filename else if (filename.endsWith('.sorted.bam.bai')) filename else null } } input: tuple val(name), path(bam) from ch_mlib_bam_filter path bed from ch_genome_filter_regions.collect() path bamtools_filter_config from ch_bamtools_filter_config output: tuple val(name), path('*.{bam,bam.bai}') into ch_mlib_filter_bam tuple val(name), path('*.flagstat') into ch_mlib_filter_bam_flagstat path '*.{idxstats,stats}' into ch_mlib_filter_bam_stats_mqc script: prefix = params.single_end ? "${name}.mLb.clN" : "${name}.mLb.flT" filter_params = params.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001' dup_params = params.keep_dups ? '' : '-F 0x0400' multimap_params = params.keep_multi_map ? '' : '-q 1' blacklist_params = params.blacklist ? "-L $bed" : '' name_sort_bam = params.single_end ? '' : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" """ samtools view \\ $filter_params \\ $dup_params \\ $multimap_params \\ $blacklist_params \\ -b ${bam[0]} \\ | bamtools filter \\ -out ${prefix}.sorted.bam \\ -script $bamtools_filter_config samtools index ${prefix}.sorted.bam samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats $name_sort_bam """ } /* * STEP 4.3: Remove orphan reads from paired-end BAM file */ if (params.single_end) { ch_mlib_filter_bam .into { ch_mlib_rm_orphan_bam_metrics; ch_mlib_rm_orphan_bam_bigwig; ch_mlib_rm_orphan_bam_macs; ch_mlib_rm_orphan_bam_plotfingerprint; ch_mlib_rm_orphan_bam_mrep; ch_mlib_name_bam_mlib_counts; ch_mlib_name_bam_mrep_counts } ch_mlib_filter_bam_flagstat .into { ch_mlib_rm_orphan_flagstat_bigwig; ch_mlib_rm_orphan_flagstat_macs; ch_mlib_rm_orphan_flagstat_mqc } ch_mlib_filter_bam_stats_mqc .set { ch_mlib_rm_orphan_stats_mqc } } else { process MERGED_LIB_BAM_REMOVE_ORPHAN { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.flagstat')) "samtools_stats/$filename" else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" else if (filename.endsWith('.stats')) "samtools_stats/$filename" else if (filename.endsWith('.sorted.bam')) filename else if (filename.endsWith('.sorted.bam.bai')) filename else null } input: tuple val(name), path(bam) from ch_mlib_filter_bam output: tuple val(name), path('*.sorted.{bam,bam.bai}') into ch_mlib_rm_orphan_bam_metrics, ch_mlib_rm_orphan_bam_bigwig, ch_mlib_rm_orphan_bam_macs, ch_mlib_rm_orphan_bam_plotfingerprint, ch_mlib_rm_orphan_bam_mrep tuple val(name), path("${prefix}.bam") into ch_mlib_name_bam_mlib_counts, ch_mlib_name_bam_mrep_counts tuple val(name), path('*.flagstat') into ch_mlib_rm_orphan_flagstat_bigwig, ch_mlib_rm_orphan_flagstat_macs, ch_mlib_rm_orphan_flagstat_mqc path '*.{idxstats,stats}' into ch_mlib_rm_orphan_stats_mqc script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ prefix = "${name}.mLb.clN" """ bampe_rm_orphan.py ${bam[0]} ${prefix}.bam --only_fr_pairs samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $prefix ${prefix}.bam samtools index ${prefix}.sorted.bam samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats """ } } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- MERGE LIBRARY BAM POST-ANALYSIS -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 5.1: Preseq analysis after merging libraries and before filtering */ process MERGED_LIB_PRESEQ { tag "$name" label 'process_low' label 'error_ignore' publishDir "${params.outdir}/bwa/mergedLibrary/preseq", mode: params.publish_dir_mode when: !params.skip_preseq input: tuple val(name), path(bam) from ch_mlib_bam_preseq output: path '*.ccurve.txt' into ch_mlib_preseq_mqc path '*.log' script: prefix = "${name}.mLb.mkD" pe = params.single_end ? '' : '-pe' """ preseq lc_extrap \\ -output ${prefix}.ccurve.txt \\ -verbose \\ -bam \\ $pe \\ -seed 1 \\ ${bam[0]} cp .command.err ${prefix}.command.log """ } /* * STEP 5.2: Picard CollectMultipleMetrics after merging libraries and filtering */ process MERGED_LIB_PICARD_METRICS { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('_metrics')) "picard_metrics/$filename" else if (filename.endsWith('.pdf')) "picard_metrics/pdf/$filename" else null } when: !params.skip_picard_metrics input: tuple val(name), path(bam) from ch_mlib_rm_orphan_bam_metrics path fasta from ch_fasta output: path '*_metrics' into ch_mlib_collectmetrics_mqc path '*.pdf' script: prefix = "${name}.mLb.clN" def avail_mem = 3 if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.toGiga() } """ picard -Xmx${avail_mem}g CollectMultipleMetrics \\ INPUT=${bam[0]} \\ OUTPUT=${prefix}.CollectMultipleMetrics \\ REFERENCE_SEQUENCE=$fasta \\ VALIDATION_STRINGENCY=LENIENT \\ TMP_DIR=tmp """ } /* * STEP 5.3: Read depth normalised bigWig */ process MERGED_LIB_BIGWIG { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/bigwig", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('scale_factor.txt')) "scale/$filename" else if (filename.endsWith('.bigWig')) filename else null } input: tuple val(name), path(bam), path(flagstat) from ch_mlib_rm_orphan_bam_bigwig.join(ch_mlib_rm_orphan_flagstat_bigwig, by: [0]) path sizes from ch_genome_sizes_mlib_bigwig.collect() output: tuple val(name), path('*.bigWig') into ch_mlib_bigwig_plotprofile path '*igv.txt' into ch_mlib_bigwig_igv path '*scale_factor.txt' script: prefix = "${name}.mLb.clN" pe_fragment = params.single_end ? '' : '-pc' extend = (params.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' """ SCALE_FACTOR=\$(grep 'mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt genomeCoverageBed -ibam ${bam[0]} -bg -scale \$SCALE_FACTOR $pe_fragment $extend | sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph bedGraphToBigWig ${prefix}.bedGraph $sizes ${prefix}.bigWig find * -type f -name "*.bigWig" -exec echo -e "bwa/mergedLibrary/bigwig/"{}"\\t0,0,178" \\; > ${prefix}.bigWig.igv.txt """ } /* * STEP 5.4: Generate gene body coverage plot with deepTools plotProfile and plotHeatmap */ process MERGED_LIB_PLOTPROFILE { tag "$name" label 'process_high' publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotProfile", mode: params.publish_dir_mode when: !params.skip_plot_profile input: tuple val(name), path(bigwig) from ch_mlib_bigwig_plotprofile path bed from ch_gene_bed output: path '*.plotProfile.tab' into ch_mlib_plotprofile_mqc path '*.{gz,pdf,mat.tab}' script: prefix = "${name}.mLb.clN" """ computeMatrix scale-regions \\ --regionsFileName $bed \\ --scoreFileName $bigwig \\ --outFileName ${prefix}.computeMatrix.mat.gz \\ --outFileNameMatrix ${prefix}.computeMatrix.vals.mat.tab \\ --regionBodyLength 1000 \\ --beforeRegionStartLength 3000 \\ --afterRegionStartLength 3000 \\ --skipZeros \\ --smartLabels \\ --numberOfProcessors $task.cpus plotProfile --matrixFile ${prefix}.computeMatrix.mat.gz \\ --outFileName ${prefix}.plotProfile.pdf \\ --outFileNameData ${prefix}.plotProfile.tab plotHeatmap --matrixFile ${prefix}.computeMatrix.mat.gz \\ --outFileName ${prefix}.plotHeatmap.pdf \\ --outFileNameMatrix ${prefix}.plotHeatmap.mat.tab """ } /* * STEP 5.5: deepTools plotFingerprint */ process MERGED_LIB_PLOTFINGERPRINT { tag "$name" label 'process_high' publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotFingerprint", mode: params.publish_dir_mode when: !params.skip_plot_fingerprint input: tuple val(name), path(bam) from ch_mlib_rm_orphan_bam_plotfingerprint output: path '*.raw.txt' into ch_mlib_plotfingerprint_mqc path '*.{txt,pdf}' script: prefix = "${name}.mLb.clN" extend = (params.single_end && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' """ plotFingerprint \\ --bamfiles ${bam[0]} \\ --plotFile ${prefix}.plotFingerprint.pdf \\ $extend \\ --labels $prefix \\ --outRawCounts ${prefix}.plotFingerprint.raw.txt \\ --outQualityMetrics ${prefix}.plotFingerprint.qcmetrics.txt \\ --skipZeros \\ --numberOfProcessors $task.cpus \\ --numberOfSamples $params.fingerprint_bins """ } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- MERGE LIBRARY PEAK ANALYSIS -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 6.1: Call peaks with MACS2 and calculate FRiP score */ process MERGED_LIB_MACS2 { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.tsv')) "qc/$filename" else if (filename.endsWith('.igv.txt')) null else filename } when: params.macs_gsize input: tuple val(name), path(bam), path(flagstat) from ch_mlib_rm_orphan_bam_macs.join(ch_mlib_rm_orphan_flagstat_macs, by: [0]) path mlib_peak_count_header from ch_mlib_peak_count_header path mlib_frip_score_header from ch_mlib_frip_score_header output: tuple val(name), path("*$PEAK_TYPE") into ch_mlib_macs_homer, ch_mlib_macs_qc, ch_mlib_macs_consensus, ch_mlib_macs_ataqv path '*igv.txt' into ch_mlib_macs_igv path '*_mqc.tsv' into ch_mlib_macs_mqc path '*.{bed,xls,gappedPeak,bdg}' script: prefix = "${name}.mLb.clN" broad = params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" format = params.single_end ? 'BAM' : 'BAMPE' pileup = params.save_macs_pileup ? '-B --SPMR' : '' fdr = params.macs_fdr ? "--qvalue ${params.macs_fdr}" : '' pvalue = params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '' """ macs2 callpeak \\ -t ${bam[0]} \\ $broad \\ -f $format \\ -g $params.macs_gsize \\ -n $prefix \\ $pileup \\ $fdr \\ $pvalue \\ --keep-dup all \\ --nomodel cat ${prefix}_peaks.${PEAK_TYPE} | wc -l | awk -v OFS='\t' '{ print "${name}", \$1 }' | cat $mlib_peak_count_header - > ${prefix}_peaks.count_mqc.tsv READS_IN_PEAKS=\$(intersectBed -a ${bam[0]} -b ${prefix}_peaks.${PEAK_TYPE} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}') grep 'mapped (' $flagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${name}", a/\$1}' | cat $mlib_frip_score_header - > ${prefix}_peaks.FRiP_mqc.tsv find * -type f -name "*.${PEAK_TYPE}" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/"{}"\\t0,0,178" \\; > ${prefix}_peaks.igv.txt """ } /* * STEP 6.2: Annotate peaks with HOMER */ process MERGED_LIB_MACS2_ANNOTATE { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: params.publish_dir_mode when: params.macs_gsize && !params.skip_peak_annotation input: tuple val(name), path(peak) from ch_mlib_macs_homer path fasta from ch_fasta path gtf from ch_gtf output: path '*.txt' into ch_mlib_macs_annotate script: prefix = "${name}.mLb.clN" """ annotatePeaks.pl \\ $peak \\ $fasta \\ -gid \\ -gtf $gtf \\ -cpu $task.cpus \\ > ${prefix}_peaks.annotatePeaks.txt """ } /* * STEP 6.3: Aggregated QC plots for peaks, FRiP and peak-to-gene annotation */ process MERGED_LIB_MACS2_QC { label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/qc", mode: params.publish_dir_mode when: params.macs_gsize && !params.skip_peak_annotation && !params.skip_peak_qc input: path peaks from ch_mlib_macs_qc.collect{ it[1] } path annos from ch_mlib_macs_annotate.collect() path mlib_peak_annotation_header from ch_mlib_peak_annotation_header output: path '*.tsv' into ch_mlib_peak_qc_mqc path '*.{txt,pdf}' script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ suffix = 'mLb.clN' """ plot_macs_qc.r \\ -i ${peaks.join(',')} \\ -s ${peaks.join(',').replaceAll(".${suffix}_peaks.${PEAK_TYPE}","")} \\ -o ./ \\ -p macs_peak.${suffix} plot_homer_annotatepeaks.r \\ -i ${annos.join(',')} \\ -s ${annos.join(',').replaceAll(".${suffix}_peaks.annotatePeaks.txt","")} \\ -o ./ \\ -p macs_annotatePeaks.${suffix} cat $mlib_peak_annotation_header macs_annotatePeaks.${suffix}.summary.txt > macs_annotatePeaks.${suffix}.summary_mqc.tsv """ } /* * STEP 6.4: Consensus peaks across samples, create boolean filtering file, SAF file for featureCounts and UpSetR plot for intersection */ process MERGED_LIB_CONSENSUS { label 'process_long' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.igv.txt')) null else filename } when: params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks input: path peaks from ch_mlib_macs_consensus.collect{ it[1] } output: path '*.bed' into ch_mlib_macs_consensus_bed path '*.saf' into ch_mlib_macs_consensus_saf path '*.boolean.txt' into ch_mlib_macs_consensus_bool path '*igv.txt' into ch_mlib_macs_consensus_igv path '*.intersect.{txt,plot.pdf}' script: // scripts are bundled with the pipeline, in nf-core/atacseq/bin/ suffix = 'mLb.clN' prefix = "consensus_peaks.${suffix}" mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') collapsecols = params.narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') expandparam = params.narrow_peak ? '--is_narrow_peak' : '' """ sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt macs2_merged_expand.py \\ ${prefix}.txt \\ ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\ ${prefix}.boolean.txt \\ --min_replicates $params.min_reps_consensus \\ $expandparam awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2, \$3, \$4, "0", "+" }' ${prefix}.boolean.txt > ${prefix}.bed echo -e "GeneID\tChr\tStart\tEnd\tStrand" > ${prefix}.saf awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$4, \$1, \$2, \$3, "+" }' ${prefix}.boolean.txt >> ${prefix}.saf sed -i 's/.${suffix}//g' ${prefix}.boolean.intersect.txt plot_peak_intersect.r -i ${prefix}.boolean.intersect.txt -o ${prefix}.boolean.intersect.plot.pdf find * -type f -name "${prefix}.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/"{}"\\t0,0,0" \\; > ${prefix}.bed.igv.txt """ } /* * STEP 6.5: Annotate consensus peaks with HOMER, and add annotation to boolean output file */ process MERGED_LIB_CONSENSUS_ANNOTATE { label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus", mode: params.publish_dir_mode when: params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks && !params.skip_peak_annotation input: path bed from ch_mlib_macs_consensus_bed path bool from ch_mlib_macs_consensus_bool path fasta from ch_fasta path gtf from ch_gtf output: path '*.annotatePeaks.txt' script: prefix = 'consensus_peaks.mLb.clN' """ annotatePeaks.pl \\ $bed \\ $fasta \\ -gid \\ -gtf $gtf \\ -cpu $task.cpus \\ > ${prefix}.annotatePeaks.txt cut -f2- ${prefix}.annotatePeaks.txt | awk 'NR==1; NR > 1 {print \$0 | "sort -T '.' -k1,1 -k2,2n"}' | cut -f6- > tmp.txt paste $bool tmp.txt > ${prefix}.boolean.annotatePeaks.txt """ } /* * STEP 6.6: Count reads in consensus peaks with featureCounts */ process MERGED_LIB_CONSENSUS_COUNTS { label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus", mode: params.publish_dir_mode when: params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks input: path bams from ch_mlib_name_bam_mlib_counts.collect{ it[1] } path saf from ch_mlib_macs_consensus_saf.collect() output: path '*featureCounts.txt' into ch_mlib_macs_consensus_counts path '*featureCounts.txt.summary' into ch_mlib_macs_consensus_counts_mqc script: prefix = 'consensus_peaks.mLb.clN' bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() pe_params = params.single_end ? '' : '-p --donotsort' """ featureCounts \\ -F SAF \\ -O \\ --fracOverlap 0.2 \\ -T $task.cpus \\ $pe_params \\ -a $saf \\ -o ${prefix}.featureCounts.txt \\ ${bam_files.join(' ')} """ } /* * STEP 6.7: Differential analysis with DESeq2 */ process MERGED_LIB_CONSENSUS_DESEQ2 { label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/deseq2", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.igv.txt')) null else filename } when: params.macs_gsize && replicatesExist && multipleGroups && !params.skip_consensus_peaks && !params.skip_diff_analysis input: path counts from ch_mlib_macs_consensus_counts path mlib_deseq2_pca_header from ch_mlib_deseq2_pca_header path mlib_deseq2_clustering_header from ch_mlib_deseq2_clustering_header output: path '*.tsv' into ch_mlib_macs_consensus_deseq_mqc path '*igv.txt' into ch_mlib_macs_consensus_deseq_comp_igv path '*.{RData,results.txt,pdf,log}' path 'sizeFactors' path '*vs*/*.{pdf,txt}' path '*vs*/*.bed' script: prefix = 'consensus_peaks.mLb.clN' bam_ext = params.single_end ? '.mLb.clN.sorted.bam' : '.mLb.clN.bam' vst = params.deseq2_vst ? '--vst TRUE' : '' """ featurecounts_deseq2.r \\ --featurecount_file $counts \\ --bam_suffix '$bam_ext' \\ --outdir ./ \\ --outprefix $prefix \\ --outsuffix .mLb.clN \\ --cores $task.cpus \\ $vst \\ cat $mlib_deseq2_pca_header ${prefix}.pca.vals.txt > ${prefix}.pca.vals_mqc.tsv cat $mlib_deseq2_clustering_header ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv find * -type f -name "*.FDR0.05.results.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/deseq2/"{}"\\t255,0,0" \\; > ${prefix}.igv.txt """ } /* * STEP 6.8: Run ataqv on BAM file and corresponding peaks */ process MERGED_LIB_ATAQV { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/ataqv/${PEAK_TYPE}", mode: params.publish_dir_mode when: !params.skip_ataqv input: tuple val(name), path(bam), path(peak) from ch_mlib_bam_ataqv.join(ch_mlib_macs_ataqv, by: [0]) path autosomes from ch_genome_autosomes.collect() path tss_bed from ch_tss_bed output: path '*.json' into ch_mlib_ataqv script: suffix = 'mLb.clN' peak_param = params.macs_gsize ? "--peak-file ${peak}" : '' mito_param = params.mito_name ? "--mitochondrial-reference-name ${params.mito_name}" : '' """ ataqv \\ --threads $task.cpus \\ $peak_param \\ --tss-file $tss_bed \\ --metrics-file ${name}.${suffix}.ataqv.json \\ --name $name \\ --ignore-read-groups \\ --autosomal-reference-file $autosomes \\ $mito_param \\ NA \\ ${bam[0]} """ } /* * STEP 6.9: Run ataqv mkarv on all JSON files to render web app */ process MERGED_LIB_ATAQV_MKARV { label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/ataqv/${PEAK_TYPE}", mode: params.publish_dir_mode when: !params.skip_ataqv input: path json from ch_mlib_ataqv.collect() output: path 'html' script: """ mkarv \\ --concurrency $task.cpus \\ --force \\ ./html/ \\ ${json.join(' ')} """ } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- MERGE REPLICATE BAM -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 7: Merge library BAM files across all replicates */ ch_mlib_rm_orphan_bam_mrep .map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } .groupTuple(by: [0]) .map { it -> [ it[0], it[1].flatten() ] } .set { ch_mlib_rm_orphan_bam_mrep } process MERGED_REP_BAM { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedReplicate", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.flagstat')) "samtools_stats/$filename" else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" else if (filename.endsWith('.stats')) "samtools_stats/$filename" else if (filename.endsWith('.metrics.txt')) "picard_metrics/$filename" else filename } input: tuple val(name), path(bams) from ch_mlib_rm_orphan_bam_mrep output: tuple val(name), path("*${prefix}.sorted.{bam,bam.bai}") into ch_mrep_bam_bigwig, ch_mrep_bam_macs tuple val(name), path('*.flagstat') into ch_mrep_bam_flagstat_bigwig, ch_mrep_bam_flagstat_macs, ch_mrep_bam_flagstat_mqc path '*.{idxstats,stats}' into ch_mrep_bam_stats_mqc path '*.txt' into ch_mrep_bam_metrics_mqc when: !params.skip_merge_replicates && replicatesExist script: prefix = "${name}.mRp.clN" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() def avail_mem = 3 if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.toGiga() } if (bam_files.size() > 1) { """ picard -Xmx${avail_mem}g MergeSamFiles \\ ${'INPUT='+bam_files.join(' INPUT=')} \\ OUTPUT=${name}.sorted.bam \\ SORT_ORDER=coordinate \\ VALIDATION_STRINGENCY=LENIENT \\ TMP_DIR=tmp samtools index ${name}.sorted.bam picard -Xmx${avail_mem}g MarkDuplicates \\ INPUT=${name}.sorted.bam \\ OUTPUT=${prefix}.sorted.bam \\ ASSUME_SORTED=true \\ REMOVE_DUPLICATES=true \\ METRICS_FILE=${prefix}.MarkDuplicates.metrics.txt \\ VALIDATION_STRINGENCY=LENIENT \\ TMP_DIR=tmp samtools index ${prefix}.sorted.bam samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats """ } else { """ ln -s ${bams[0]} ${prefix}.sorted.bam ln -s ${bams[1]} ${prefix}.sorted.bam.bai touch ${prefix}.MarkDuplicates.metrics.txt samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats """ } } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- MERGE REPLICATE BAM POST-ANALYSIS -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 8.1: Read depth normalised bigWig */ process MERGED_REP_BIGWIG { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedReplicate/bigwig", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('scale_factor.txt')) "scale/$filename" else if (filename.endsWith('.bigWig')) filename else null } when: !params.skip_merge_replicates && replicatesExist input: tuple val(name), path(bam), path(flagstat) from ch_mrep_bam_bigwig.join(ch_mrep_bam_flagstat_bigwig, by: [0]) path sizes from ch_genome_sizes_mrep_bigwig.collect() output: tuple val(name), path('*.bigWig') into ch_mrep_bigwig path '*igv.txt' into ch_mrep_bigwig_igv path '*scale_factor.txt' script: prefix = "${name}.mRp.clN" pe_fragment = params.single_end ? '' : '-pc' extend = (params.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' """ SCALE_FACTOR=\$(grep 'mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt genomeCoverageBed -ibam ${bam[0]} -bg -scale \$SCALE_FACTOR $pe_fragment $extend | sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph bedGraphToBigWig ${prefix}.bedGraph $sizes ${prefix}.bigWig find * -type f -name "*.bigWig" -exec echo -e "bwa/mergedReplicate/bigwig/"{}"\\t0,0,178" \\; > ${prefix}.bigWig.igv.txt """ } /* * STEP 8.2: Call peaks with MACS2 and calculate FRiP score */ process MERGED_REP_MACS2 { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedReplicate/macs/${PEAK_TYPE}", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.tsv')) "qc/$filename" else if (filename.endsWith('.igv.txt')) null else filename } when: !params.skip_merge_replicates && replicatesExist && params.macs_gsize input: tuple val(name), path(bam), path(flagstat) from ch_mrep_bam_macs.join(ch_mrep_bam_flagstat_macs, by: [0]) path mrep_peak_count_header from ch_mrep_peak_count_header path mrep_frip_score_header from ch_mrep_frip_score_header output: tuple val(name), path("*$PEAK_TYPE") into ch_mrep_macs_homer, ch_mrep_macs_qc, ch_mrep_macs_consensus path '*igv.txt' into ch_mrep_macs_igv path '*_mqc.tsv' into ch_mrep_macs_mqc path '*.{bed,xls,gappedPeak,bdg}' script: prefix = "${name}.mRp.clN" broad = params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" format = params.single_end ? 'BAM' : 'BAMPE' pileup = params.save_macs_pileup ? '-B --SPMR' : '' """ macs2 callpeak \\ -t ${bam[0]} \\ $broad \\ -f $format \\ -g $params.macs_gsize \\ -n $prefix \\ $pileup \\ --keep-dup all \\ --nomodel cat ${prefix}_peaks.${PEAK_TYPE} | wc -l | awk -v OFS='\t' '{ print "${name}", \$1 }' | cat $mrep_peak_count_header - > ${prefix}_peaks.count_mqc.tsv READS_IN_PEAKS=\$(intersectBed -a ${bam[0]} -b ${prefix}_peaks.${PEAK_TYPE} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}') grep 'mapped (' $flagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${name}", a/\$1}' | cat $mrep_frip_score_header - > ${prefix}_peaks.FRiP_mqc.tsv find * -type f -name "*.${PEAK_TYPE}" -exec echo -e "bwa/mergedReplicate/macs/${PEAK_TYPE}/"{}"\\t0,0,178" \\; > ${prefix}_peaks.igv.txt """ } /* * STEP 8.3: Annotate peaks with HOMER */ process MERGED_REP_MACS2_ANNOTATE { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedReplicate/macs/${PEAK_TYPE}", mode: params.publish_dir_mode when: !params.skip_merge_replicates && replicatesExist && params.macs_gsize && !params.skip_peak_annotation input: tuple val(name), path(peak) from ch_mrep_macs_homer path fasta from ch_fasta path gtf from ch_gtf output: path '*.txt' into ch_mrep_macs_annotate script: prefix = "${name}.mRp.clN" """ annotatePeaks.pl \\ $peak \\ $fasta \\ -gid \\ -gtf $gtf \\ -cpu $task.cpus \\ > ${prefix}_peaks.annotatePeaks.txt """ } /* * STEP 8.4: Aggregated QC plots for peaks, FRiP and peak-to-gene annotation */ process MERGED_REP_MACS2_QC { label 'process_medium' publishDir "${params.outdir}/bwa/mergedReplicate/macs/${PEAK_TYPE}/qc", mode: params.publish_dir_mode when: !params.skip_merge_replicates && replicatesExist && params.macs_gsize && !params.skip_peak_qc && !params.skip_peak_annotation input: path peaks from ch_mrep_macs_qc.collect{ it[1] } path annos from ch_mrep_macs_annotate.collect() path mrep_peak_annotation_header from ch_mrep_peak_annotation_header output: path '*.tsv' into ch_mrep_peak_qc_mqc path '*.{txt,pdf}' script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ suffix = 'mRp.clN' """ plot_macs_qc.r \\ -i ${peaks.join(',')} \\ -s ${peaks.join(',').replaceAll(".${suffix}_peaks.${PEAK_TYPE}","")} \\ -o ./ \\ -p macs_peak.${suffix} plot_homer_annotatepeaks.r \\ -i ${annos.join(',')} \\ -s ${annos.join(',').replaceAll(".${suffix}_peaks.annotatePeaks.txt","")} \\ -o ./ \\ -p macs_annotatePeaks.${suffix} cat $mrep_peak_annotation_header macs_annotatePeaks.${suffix}.summary.txt > macs_annotatePeaks.${suffix}.summary_mqc.tsv """ } /* * STEP 8.5: Consensus peaks across samples, create boolean filtering file, SAF file for featureCounts and UpSetR plot for intersection */ process MERGED_REP_CONSENSUS { label 'process_long' publishDir "${params.outdir}/bwa/mergedReplicate/macs/${PEAK_TYPE}/consensus", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.igv.txt')) null else filename } when: !params.skip_merge_replicates && replicatesExist && params.macs_gsize && multipleGroups && !params.skip_consensus_peaks input: path peaks from ch_mrep_macs_consensus.collect{ it[1] } output: path '*.bed' into ch_mrep_macs_consensus_bed path '*.saf' into ch_mrep_macs_consensus_saf path '*.boolean.txt' into ch_mrep_macs_consensus_bool path '*igv.txt' into ch_mrep_macs_consensus_igv path '*.intersect.{txt,plot.pdf}' script: // scripts are bundled with the pipeline, in nf-core/atacseq/bin/ suffix = 'mRp.clN' prefix = "consensus_peaks.${suffix}" mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') collapsecols = params.narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') expandparam = params.narrow_peak ? '--is_narrow_peak' : '' """ sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt macs2_merged_expand.py \\ ${prefix}.txt \\ ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\ ${prefix}.boolean.txt \\ $expandparam awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2, \$3, \$4, "0", "+" }' ${prefix}.boolean.txt > ${prefix}.bed echo -e "GeneID\tChr\tStart\tEnd\tStrand" > ${prefix}.saf awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$4, \$1, \$2, \$3, "+" }' ${prefix}.boolean.txt >> ${prefix}.saf sed -i 's/.${suffix}//g' ${prefix}.boolean.intersect.txt plot_peak_intersect.r -i ${prefix}.boolean.intersect.txt -o ${prefix}.boolean.intersect.plot.pdf find * -type f -name "${prefix}.bed" -exec echo -e "bwa/mergedReplicate/macs/${PEAK_TYPE}/consensus/"{}"\\t0,0,0" \\; > ${prefix}.bed.igv.txt """ } /* * STEP 8.6: Annotate consensus peaks with HOMER, and add annotation to boolean output file */ process MERGED_REP_CONSENSUS_ANNOTATE { label 'process_medium' publishDir "${params.outdir}/bwa/mergedReplicate/macs/${PEAK_TYPE}/consensus", mode: params.publish_dir_mode when: !params.skip_merge_replicates && replicatesExist && params.macs_gsize && multipleGroups && !params.skip_consensus_peaks && !params.skip_peak_annotation input: path bed from ch_mrep_macs_consensus_bed path bool from ch_mrep_macs_consensus_bool path fasta from ch_fasta path gtf from ch_gtf output: path '*.annotatePeaks.txt' script: prefix = 'consensus_peaks.mRp.clN' """ annotatePeaks.pl \\ $bed \\ $fasta \\ -gid \\ -gtf $gtf \\ -cpu $task.cpus \\ > ${prefix}.annotatePeaks.txt cut -f2- ${prefix}.annotatePeaks.txt | awk 'NR==1; NR > 1 {print \$0 | "sort -T '.' -k1,1 -k2,2n"}' | cut -f6- > tmp.txt paste $bool tmp.txt > ${prefix}.boolean.annotatePeaks.txt """ } /* * STEP 8.7: Count reads in consensus peaks with featureCounts */ process MERGED_REP_CONSENSUS_COUNTS { label 'process_medium' publishDir "${params.outdir}/bwa/mergedReplicate/macs/${PEAK_TYPE}/consensus", mode: params.publish_dir_mode when: !params.skip_merge_replicates && replicatesExist && params.macs_gsize && multipleGroups && !params.skip_consensus_peaks input: path bams from ch_mlib_name_bam_mrep_counts.collect{ it[1] } path saf from ch_mrep_macs_consensus_saf.collect() output: path '*featureCounts.txt' into ch_mrep_macs_consensus_counts path '*featureCounts.txt.summary' into ch_mrep_macs_consensus_counts_mqc script: prefix = 'consensus_peaks.mRp.clN' bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() pe_params = params.single_end ? '' : '-p --donotsort' """ featureCounts \\ -F SAF \\ -O \\ --fracOverlap 0.2 \\ -T $task.cpus \\ $pe_params \\ -a $saf \\ -o ${prefix}.featureCounts.txt \\ ${bam_files.join(' ')} """ } /* * STEP 8.8: Differential analysis with DESeq2 */ process MERGED_REP_CONSENSUS_DESEQ2 { label 'process_medium' publishDir "${params.outdir}/bwa/mergedReplicate/macs/${PEAK_TYPE}/consensus/deseq2", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('.igv.txt')) null else filename } when: !params.skip_merge_replicates && replicatesExist && params.macs_gsize && multipleGroups && !params.skip_consensus_peaks && !params.skip_diff_analysis input: path counts from ch_mrep_macs_consensus_counts path mrep_deseq2_pca_header from ch_mrep_deseq2_pca_header path mrep_deseq2_clustering_header from ch_mrep_deseq2_clustering_header output: path '*.tsv' into ch_mrep_macs_consensus_deseq_mqc path '*igv.txt' into ch_mrep_macs_consensus_deseq_comp_igv path '*.{RData,results.txt,pdf,log}' path 'sizeFactors' path '*vs*/*.{pdf,txt}' path '*vs*/*.bed' script: prefix = 'consensus_peaks.mRp.clN' bam_ext = params.single_end ? '.mLb.clN.sorted.bam' : '.mLb.clN.bam' vst = params.deseq2_vst ? '--vst TRUE' : '' """ featurecounts_deseq2.r \\ --featurecount_file $counts \\ --bam_suffix '$bam_ext' \\ --outdir ./ \\ --outprefix $prefix \\ --outsuffix .mRp.clN \\ --cores $task.cpus \\ $vst cat $mrep_deseq2_pca_header ${prefix}.pca.vals.txt > ${prefix}.pca.vals_mqc.tsv cat $mrep_deseq2_clustering_header ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv find * -type f -name "*.FDR0.05.results.bed" -exec echo -e "bwa/mergedReplicate/macs/${PEAK_TYPE}/consensus/deseq2/"{}"\\t255,0,0" \\; > ${prefix}.igv.txt """ } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- IGV -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * STEP 9: Create IGV session file */ process IGV { publishDir "${params.outdir}/igv/${PEAK_TYPE}", mode: params.publish_dir_mode when: !params.skip_igv input: path fasta from ch_fasta path bigwigs from ch_mlib_bigwig_igv.collect().ifEmpty([]) path peaks from ch_mlib_macs_igv.collect().ifEmpty([]) path consensus_peaks from ch_mlib_macs_consensus_igv.collect().ifEmpty([]) path differential_peaks from ch_mlib_macs_consensus_deseq_comp_igv.collect().ifEmpty([]) path rbigwigs from ch_mrep_bigwig_igv.collect().ifEmpty([]) path rpeaks from ch_mrep_macs_igv.collect().ifEmpty([]) path rconsensus_peaks from ch_mrep_macs_consensus_igv.collect().ifEmpty([]) path rdifferential_peaks from ch_mrep_macs_consensus_deseq_comp_igv.collect().ifEmpty([]) output: path '*.{txt,xml}' script: // scripts are bundled with the pipeline, in nf-core/atacseq/bin/ """ cat *.txt > igv_files.txt igv_files_to_session.py igv_session.xml igv_files.txt ../../genome/${fasta.getName()} --path_prefix '../../' """ } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ /* -- MULTIQC -- */ /* -- -- */ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* * Parse software version numbers */ process get_software_versions { publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.indexOf('.csv') > 0) filename else null } output: path 'software_versions_mqc.yaml' into ch_software_versions_mqc path 'software_versions.csv' script: """ echo $workflow.manifest.version > v_pipeline.txt echo $workflow.nextflow.version > v_nextflow.txt fastqc --version > v_fastqc.txt trim_galore --version > v_trim_galore.txt echo \$(bwa 2>&1) > v_bwa.txt samtools --version > v_samtools.txt bedtools --version > v_bedtools.txt echo \$(bamtools --version 2>&1) > v_bamtools.txt echo \$(plotFingerprint --version 2>&1) > v_deeptools.txt || true picard MarkDuplicates --version &> v_picard.txt || true echo \$(R --version 2>&1) > v_R.txt python -c "import pysam; print(pysam.__version__)" > v_pysam.txt echo \$(macs2 --version 2>&1) > v_macs2.txt touch v_homer.txt echo \$(ataqv --version 2>&1) > v_ataqv.txt echo \$(featureCounts -v 2>&1) > v_featurecounts.txt preseq &> v_preseq.txt multiqc --version > v_multiqc.txt scrape_software_versions.py &> software_versions_mqc.yaml """ } Channel.from(summary.collect{ [it.key, it.value] }) .map { k,v -> "