Skip to content

Commit

Permalink
iterate to unify the fastq validation and stats
Browse files Browse the repository at this point in the history
  • Loading branch information
abhi18av committed Jun 22, 2024
1 parent af0b380 commit 3d63947
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 14 deletions.
4 changes: 2 additions & 2 deletions bin/fastq_cohort_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
vcf_name = args['joint_vcf_name']

# Check for files matching *check.passed*
passed_files = glob.glob("*check.passed*")
passed_files = glob.glob("fastq_validation/*check.passed*")
if passed_files:
with open(f"{vcf_name}.fastqs.passed.tsv", "w") as outfile:
for fname in passed_files:
Expand All @@ -31,7 +31,7 @@
open(f"{vcf_name}.fastqs.failed.tsv", 'a').close()

# Check for files matching *check.failed*
failed_files = glob.glob("*check.failed*")
failed_files = glob.glob("fastq_validation/*check.failed*")
if failed_files:
with open(f"{vcf_name}.fastqs.failed.tsv", "w") as outfile:
for fname in failed_files:
Expand Down
2 changes: 1 addition & 1 deletion modules/fastq_utils/validator.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ process FASTQ_VALIDATOR {
output:
tuple val(sampleName), path("*.check.*tsv")
path("*.check.*tsv") , emit: check_result
tuple val(sampleName), path(sampleReads) , emit: passed_reads
tuple val(sampleName), path(sampleReads) , emit: reads

shell:

Expand Down
3 changes: 2 additions & 1 deletion modules/utils/fastq_cohort_validation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ process UTILS_FASTQ_COHORT_VALIDATION {
publishDir params.results_dir, mode: params.save_mode, enabled: params.should_publish

input:
path("*")
path("fastq_validation/*")
path("fastq_stats/*")

output:
path("*.fastqs.passed.tsv"), emit: passed_fastqs
Expand Down
16 changes: 8 additions & 8 deletions modules/utils/fastq_stats.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ process UTILS_FASTQ_STATS {
path("*fastq_stats.csv")

script:
"""
seqkit stats -a -T *fastq* > ${sampleName}.seqkit.txt

"""
seqkit stats -a -T *fastq* > ${sampleName}.seqkit.txt
cat ${sampleName}.seqkit.txt | csvtk space2tab | csvtk tab2csv > ${sampleName}.seqkit_stats.final.csv
md5sum *fastq* > ${sampleName}.md5sum.txt
md5sum *fastq* > ${sampleName}.md5sum.txt
cat ${sampleName}.md5sum.txt | csvtk space2tab | csvtk tab2csv | csvtk add-header -n md5sum,file > ${sampleName}.md5sum_stats.csv
du -shL *fastq* > ${sampleName}.du.txt
du -shL *fastq* > ${sampleName}.du.txt
cat ${sampleName}.du.txt | csvtk tab2csv | csvtk add-header -n size,file > ${sampleName}.du_stats.csv
Expand All @@ -32,10 +32,10 @@ process UTILS_FASTQ_STATS {
"""

stub:
stub:

"""
touch ${sampleName}.check.tsv
"""
touch ${sampleName}.check.tsv
"""

}
4 changes: 2 additions & 2 deletions workflows/validate_fastqs_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ workflow VALIDATE_FASTQS_WF {

FASTQ_VALIDATOR( reads_ch, ready )

UTILS_FASTQ_STATS( FASTQ_VALIDATOR.out.passed_reads )
UTILS_FASTQ_STATS( FASTQ_VALIDATOR.out.reads )

UTILS_FASTQ_COHORT_VALIDATION( FASTQ_VALIDATOR.out.check_result.collect() )
UTILS_FASTQ_COHORT_VALIDATION( FASTQ_VALIDATOR.out.check_result.collect(), UTILS_FASTQ_STATS.out.collect() )

emit:

Expand Down

0 comments on commit 3d63947

Please sign in to comment.