publish fastq stats by default (for now)

TORCH-Consortium · Jun 22, 2024 · af0b380 · af0b380
1 parent 7f313be
commit af0b380
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 30 deletions.
diff --git a/bin/fastq_stats.py b/bin/fastq_stats.py
@@ -2,11 +2,9 @@
 
 import ast
 import argparse
-import re
 
 import pandas as pd
 
-re_mapped_p = re.compile(r'\d* mapped \((.*)%\)')
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Process the sample stats')

diff --git a/default_params.config b/default_params.config
@@ -1,4 +1,4 @@
-// ##### BASIC INPUT ##### 
+// ##### BASIC INPUT #####
 
 // The input CSV sample file (the study id cannot start with 'XBS_REF_')
 //NOTE: The samplesheet should have the following fields [study, sample, library, attempt, flowcell, lane, index_sequence, r1, r2]
@@ -16,12 +16,12 @@ vcf_name = "joint"
 // NOTE: Got little genetic diveristy in your dataset? (.e.g clonal or <20 samples) - use the EXIT-RIF GVCF file to include additional samples.
 
 use_ref_gvcf = true
-ref_gvcf =  "${projectDir}/resources/ref_gvcfs/LineagesAndOutgroupV2.g.vcf.gz" 
+ref_gvcf =  "${projectDir}/resources/ref_gvcfs/LineagesAndOutgroupV2.g.vcf.gz"
 ref_gvcf_tbi =  "${projectDir}/resources/ref_gvcfs/LineagesAndOutgroupV2.g.vcf.gz.tbi"
 
 // ##### The follow sections generally do not require adjusting. #####
 
-//  ##### QC THRESHOLDS ##### 
+//  ##### QC THRESHOLDS #####
 
 //The median coverage required to process the sample
 median_coverage_cutoff = 10
@@ -41,7 +41,7 @@ site_representation_cutoff = 0.95
 
 strand_bias_cutoff = 0.05
 
-// ##### Partial workflows ##### 
+// ##### Partial workflows #####
 
 // Set this to true if you'd like to only validate input fastqs and check their FASTQC reports
 only_validate_fastqs = false // OR true
@@ -62,7 +62,7 @@ skip_base_recalibration = true
 skip_minor_variants_gatk = true
 
 //=========================================
-// 
+//
 //=========================================
 
 // Use this flag to disable downstream phylogenetic of merged GVCF
@@ -78,7 +78,7 @@ iqtree_fast_ml_only= false
 iqtree_fast_bootstrapped_phylogeny= false
 iqtree_accurate_ml_only= false
 
-// ##### SPECIFIC PATHS AND PARAMETERS ##### 
+// ##### SPECIFIC PATHS AND PARAMETERS #####
 
 //NOTE: It is best not to change this parameters and to rely upon the provided reference files
 ref_fasta_basename = "NC-000962-3-H37Rv"
@@ -173,7 +173,7 @@ SAMPLESHEET_VALIDATION {
 
 FASTQ_STATS {
     results_dir = "${params.outdir}/QC_statistics/per_sample/fastq_stats/"
-    should_publish = false
+    should_publish = true
 }
 
 
@@ -307,16 +307,16 @@ LOFREQ_CALL__NTM {
 
     region = "1472307-1472307"
     arguments = " -m 60 -Q 20 -a 1 "
-    
-    should_publish = false    
+
+    should_publish = false
 }
 
 LOFREQ_INDELQUAL {
     results_dir = "${params.outdir}/vcf_files/per_sample/minor_variants/"
 
     arguments = "-m 60"
-    
-    should_publish = false    
+
+    should_publish = false
 }
 
 SAMTOOLS_INDEX__LOFREQ {
@@ -328,7 +328,7 @@ LOFREQ_CALL {
     results_dir = "${params.outdir}/vcf_files/per_sample/minor_variants/"
     should_publish = false
 
-    //NOTE: Curretly using default p-value for filtering. Use '-a 1' to get all minor variants 
+    //NOTE: Curretly using default p-value for filtering. Use '-a 1' to get all minor variants
     arguments = "-m 60 --call-indels"
 }
 
@@ -413,18 +413,18 @@ NTMPROFILER_COLLATE {
 
 GATK_COMBINE_GVCFS {
     results_dir = "${params.outdir}/vcf_files/cohort/raw_variant_files/combined"
-    
+
     arguments = " -G StandardAnnotation -G AS_StandardAnnotation "
-    
+
     should_publish = false
 }
 
 GATK_GENOTYPE_GVCFS {
     results_dir = "${params.outdir}/vcf_files/cohort/raw_variant_files/"
 
     arguments = " -G StandardAnnotation -G AS_StandardAnnotation --sample-ploidy 1 "
-    
-    should_publish = false    
+
+    should_publish = false
 }
 
 
@@ -433,7 +433,7 @@ SNPEFF {
 
     arguments = " -nostats -ud 100 Mycobacterium_tuberculosis_h37rv "
 
-    should_publish = false    
+    should_publish = false
 }
 
 
@@ -474,16 +474,16 @@ GATK_SELECT_VARIANTS__SNP {
     results_dir = "${params.outdir}/vcf_files/cohort/snp_variant_files/"
 
     arguments = " --remove-unused-alternates --exclude-non-variants "
-    
-    should_publish = false        
+
+    should_publish = false
 }
 
 GATK_SELECT_VARIANTS__INDEL {
     results_dir = "${params.outdir}/vcf_files/cohort/indel_variant_files/"
 
     arguments = " --remove-unused-alternates --exclude-non-variants --select-type-to-include MNP --select-type-to-include MIXED"
-   
-    should_publish = false 
+
+    should_publish = false
 }
 
 
@@ -682,7 +682,7 @@ GATK_VARIANTS_TO_TABLE {
     results_dir = "${params.outdir}/vcf_files/cohort/multiple_alignment_files/"
 
     arguments = " -GF GT "
-    
+
     should_publish = false
 }
 
@@ -700,7 +700,7 @@ SNPDISTS {
 IQTREE {
     results_dir = "${params.outdir}/analyses/phylogeny/"
 
-    //NOTE: The arguments of IQTREE are decided within the process 
+    //NOTE: The arguments of IQTREE are decided within the process
     // as per the discussion here https://github.com/TORCH-Consortium/MAGMA/discussions/164#discussioncomment-6839547
 }
 

diff --git a/modules/fastq_utils/validator.nf b/modules/fastq_utils/validator.nf
@@ -12,12 +12,12 @@ process FASTQ_VALIDATOR {
         val ready
 
     output:
-        tuple val(sampleName), path("*.check.*tsv") 
+        tuple val(sampleName), path("*.check.*tsv")
         path("*.check.*tsv")                          , emit: check_result
         tuple val(sampleName), path(sampleReads)      , emit: passed_reads
 
     shell:
-       
+
         '''
         !{params.fastq_validator_path} !{sampleReads} \\
             2>!{sampleName}.command.log || true
@@ -42,10 +42,10 @@ process FASTQ_VALIDATOR {
 
         '''
 
-    stub: 
+    stub:
 
         """
-        touch ${sampleName}.check.tsv 
-        """ 
+        touch ${sampleName}.check.tsv
+        """
 
 }