Skip to content

Commit

Permalink
publish fastq stats by default (for now)
Browse files Browse the repository at this point in the history
  • Loading branch information
abhi18av committed Jun 22, 2024
1 parent 7f313be commit af0b380
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 30 deletions.
2 changes: 0 additions & 2 deletions bin/fastq_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@

import ast
import argparse
import re

import pandas as pd

re_mapped_p = re.compile(r'\d* mapped \((.*)%\)')

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process the sample stats')
Expand Down
46 changes: 23 additions & 23 deletions default_params.config
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// ##### BASIC INPUT #####
// ##### BASIC INPUT #####

// The input CSV sample file (the study id cannot start with 'XBS_REF_')
//NOTE: The samplesheet should have the following fields [study, sample, library, attempt, flowcell, lane, index_sequence, r1, r2]
Expand All @@ -16,12 +16,12 @@ vcf_name = "joint"
// NOTE: Got little genetic diveristy in your dataset? (.e.g clonal or <20 samples) - use the EXIT-RIF GVCF file to include additional samples.

use_ref_gvcf = true
ref_gvcf = "${projectDir}/resources/ref_gvcfs/LineagesAndOutgroupV2.g.vcf.gz"
ref_gvcf = "${projectDir}/resources/ref_gvcfs/LineagesAndOutgroupV2.g.vcf.gz"
ref_gvcf_tbi = "${projectDir}/resources/ref_gvcfs/LineagesAndOutgroupV2.g.vcf.gz.tbi"

// ##### The follow sections generally do not require adjusting. #####

// ##### QC THRESHOLDS #####
// ##### QC THRESHOLDS #####

//The median coverage required to process the sample
median_coverage_cutoff = 10
Expand All @@ -41,7 +41,7 @@ site_representation_cutoff = 0.95

strand_bias_cutoff = 0.05

// ##### Partial workflows #####
// ##### Partial workflows #####

// Set this to true if you'd like to only validate input fastqs and check their FASTQC reports
only_validate_fastqs = false // OR true
Expand All @@ -62,7 +62,7 @@ skip_base_recalibration = true
skip_minor_variants_gatk = true

//=========================================
//
//
//=========================================

// Use this flag to disable downstream phylogenetic of merged GVCF
Expand All @@ -78,7 +78,7 @@ iqtree_fast_ml_only= false
iqtree_fast_bootstrapped_phylogeny= false
iqtree_accurate_ml_only= false

// ##### SPECIFIC PATHS AND PARAMETERS #####
// ##### SPECIFIC PATHS AND PARAMETERS #####

//NOTE: It is best not to change this parameters and to rely upon the provided reference files
ref_fasta_basename = "NC-000962-3-H37Rv"
Expand Down Expand Up @@ -173,7 +173,7 @@ SAMPLESHEET_VALIDATION {

FASTQ_STATS {
results_dir = "${params.outdir}/QC_statistics/per_sample/fastq_stats/"
should_publish = false
should_publish = true
}


Expand Down Expand Up @@ -307,16 +307,16 @@ LOFREQ_CALL__NTM {

region = "1472307-1472307"
arguments = " -m 60 -Q 20 -a 1 "
should_publish = false

should_publish = false
}

LOFREQ_INDELQUAL {
results_dir = "${params.outdir}/vcf_files/per_sample/minor_variants/"

arguments = "-m 60"
should_publish = false

should_publish = false
}

SAMTOOLS_INDEX__LOFREQ {
Expand All @@ -328,7 +328,7 @@ LOFREQ_CALL {
results_dir = "${params.outdir}/vcf_files/per_sample/minor_variants/"
should_publish = false

//NOTE: Curretly using default p-value for filtering. Use '-a 1' to get all minor variants
//NOTE: Curretly using default p-value for filtering. Use '-a 1' to get all minor variants
arguments = "-m 60 --call-indels"
}

Expand Down Expand Up @@ -413,18 +413,18 @@ NTMPROFILER_COLLATE {

GATK_COMBINE_GVCFS {
results_dir = "${params.outdir}/vcf_files/cohort/raw_variant_files/combined"

arguments = " -G StandardAnnotation -G AS_StandardAnnotation "

should_publish = false
}

GATK_GENOTYPE_GVCFS {
results_dir = "${params.outdir}/vcf_files/cohort/raw_variant_files/"

arguments = " -G StandardAnnotation -G AS_StandardAnnotation --sample-ploidy 1 "
should_publish = false

should_publish = false
}


Expand All @@ -433,7 +433,7 @@ SNPEFF {

arguments = " -nostats -ud 100 Mycobacterium_tuberculosis_h37rv "

should_publish = false
should_publish = false
}


Expand Down Expand Up @@ -474,16 +474,16 @@ GATK_SELECT_VARIANTS__SNP {
results_dir = "${params.outdir}/vcf_files/cohort/snp_variant_files/"

arguments = " --remove-unused-alternates --exclude-non-variants "
should_publish = false

should_publish = false
}

GATK_SELECT_VARIANTS__INDEL {
results_dir = "${params.outdir}/vcf_files/cohort/indel_variant_files/"

arguments = " --remove-unused-alternates --exclude-non-variants --select-type-to-include MNP --select-type-to-include MIXED"
should_publish = false

should_publish = false
}


Expand Down Expand Up @@ -682,7 +682,7 @@ GATK_VARIANTS_TO_TABLE {
results_dir = "${params.outdir}/vcf_files/cohort/multiple_alignment_files/"

arguments = " -GF GT "

should_publish = false
}

Expand All @@ -700,7 +700,7 @@ SNPDISTS {
IQTREE {
results_dir = "${params.outdir}/analyses/phylogeny/"

//NOTE: The arguments of IQTREE are decided within the process
//NOTE: The arguments of IQTREE are decided within the process
// as per the discussion here https://github.com/TORCH-Consortium/MAGMA/discussions/164#discussioncomment-6839547
}

Expand Down
10 changes: 5 additions & 5 deletions modules/fastq_utils/validator.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ process FASTQ_VALIDATOR {
val ready

output:
tuple val(sampleName), path("*.check.*tsv")
tuple val(sampleName), path("*.check.*tsv")
path("*.check.*tsv") , emit: check_result
tuple val(sampleName), path(sampleReads) , emit: passed_reads

shell:

'''
!{params.fastq_validator_path} !{sampleReads} \\
2>!{sampleName}.command.log || true
Expand All @@ -42,10 +42,10 @@ process FASTQ_VALIDATOR {
'''

stub:
stub:

"""
touch ${sampleName}.check.tsv
"""
touch ${sampleName}.check.tsv
"""

}

0 comments on commit af0b380

Please sign in to comment.