Skip to content

Commit

Permalink
accomodate the sample maps [ci skip]
Browse files Browse the repository at this point in the history
  • Loading branch information
abhi18av committed Jun 23, 2024
1 parent d399c71 commit 4234d08
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 94 deletions.
68 changes: 16 additions & 52 deletions bin/fastq_cohort_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,6 @@

vcf_name = args['joint_vcf_name']

# # Check for files matching *check.passed*
# passed_files = glob.glob("fastq_validation/*check.passed*")
# passed_data = [] # Store contents of passed files
# if passed_files:
# for fname in passed_files:
# with open(fname) as infile:
# passed_data.append(infile.read())
# with open(f"{vcf_name}.fastqs.passed.tsv", "w") as outfile:
# for data in passed_data:
# outfile.write(data)
# else:
# print("No samples passed!")
#
# # Create the failed file anyhow, since this is an optional output
# open(f"{vcf_name}.fastqs.failed.tsv", 'a').close()
#
# # Check for files matching *check.failed*
# failed_files = glob.glob("fastq_validation/*check.failed*")
# failed_data = [] # Store contents of failed files
# if failed_files:
# for fname in failed_files:
# with open(fname) as infile:
# failed_data.append(infile.read())
# with open(f"{vcf_name}.fastqs.failed.tsv", "w") as outfile:
# for data in failed_data:
# outfile.write(data)
# else:
# print("No samples failed!")

# ============================================
# Parse the validation reports for exact sample names which passed/failed
# ============================================
Expand All @@ -64,44 +35,37 @@

if magma_analysis_dict[k]['R1'] is not None:
fastq_1_name = magma_analysis_dict[k]['R1'].split("/")[-1]
magma_analysis_dict[k]["fastqs_approved"] = True
if fastq_1_name in fastq_report_keys_list:
magma_analysis_dict[k]["fastq_report"][fastq_1_name] = {"file": fastq_report_dict[fastq_1_name]}
else:
magma_analysis_dict[k]["fastq_report"][fastq_1_name] = {"fastq_utils_check": "failed"}
magma_analysis_dict[k]["fastqs_approved"] = False

if magma_analysis_dict[k]['R2'] is not None:
fastq_2_name = magma_analysis_dict[k]['R2'].split("/")[-1]
if fastq_2_name in fastq_report_keys_list:
magma_analysis_dict[k]["fastq_report"][fastq_2_name] = {"file": fastq_report_dict[fastq_2_name]}
else:
magma_analysis_dict[k]["fastq_report"][fastq_2_name] = {"fastq_utils_check": "failed"}
magma_analysis_dict[k]["fastqs_approved"] = False

with open('magma_analysis.json', 'w') as f:
json.dump(magma_analysis_dict, f, indent=4)

# ============================================
# Parse the validation reports for exact sample names which passed/failed
# ============================================
#
# validation_and_stats_dict = {}
# validate_passed_samples = []
#
# for row in passed_data:
# row_split = row.split("\t")
# derived_magma_name = row_split[0]
# sample_name = derived_magma_name.split(".")[1]
# validate_passed_samples.append(sample_name)
# validation_and_stats_dict[sample_name] = {"magma_name": derived_magma_name,
# "fastq_validation_status": "passed"}
#
# validate_failed_samples = []
# for row in failed_data:
# row_split = row.split("\t")
# derived_magma_name = row_split[0]
# sample_name = derived_magma_name.split(".")[1]
# validate_passed_samples.append(sample_name)
# validation_and_stats_dict[sample_name] = {"magma_name": derived_magma_name,
# "fastq_validation_status": "failed"}
#

# print(validation_and_stats_dict)

# Filter the dictionary for samples with fastqs_approved == True and False
approved_samples = {k for k, v in magma_analysis_dict.items() if v["fastqs_approved"] == True}
# Write approved_samples to a txt file with newline
with open("approved_samples.txt", "w") as f:
for sample in approved_samples:
f.write(sample + "\n")

rejected_samples = {k for k, v in magma_analysis_dict.items() if v["fastqs_approved"] == False}
# Write approved_samples to a txt file with newline
with open("rejected_samples.txt", "w") as f:
for sample in rejected_samples:
f.write(sample + "\n")
44 changes: 2 additions & 42 deletions workflows/validate_fastqs_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -51,53 +51,13 @@ workflow VALIDATE_FASTQS_WF {

UTILS_FASTQ_COHORT_VALIDATION( FASTQ_VALIDATOR.out.fastq_report.collect(), samplesheet )

/*
reads_ch = Channel.fromPath(samplesheet)
.splitCsv(header: false, skip: 1)
.map { row -> {
study = row[0]
sample = row[1]
library = row[2]
attempt = row[3]
read1 = row[4]
read2 = row[5]
flowcell = row[6]
lane = row[7]
index_sequence = row[8]
//NOTE: Platform is hard-coded to illumina
bam_rg_string ="@RG\\tID:${flowcell}.${lane}\\tSM:${study}.${sample}\\tPL:illumina\\tLB:lib${library}\\tPU:${flowcell}.${lane}.${index_sequence}"
unique_sample_id = "${study}.${sample}.L${library}.A${attempt}.${flowcell}.${lane}.${index_sequence}"
//Accomodate single/multi reads
if (read1 && read2) {
return [unique_sample_id, bam_rg_string, [file(read1, checkIfExists: true), file(read2, checkIfExists: true)]]
} else {
return [unique_sample_id, bam_rg_string, [file(read1, checkIfExists: true)]]
}
}
}


emit:

passed_fastqs_ch = UTILS_FASTQ_COHORT_VALIDATION.out.passed_fastqs
.splitCsv(header: false, sep: '\t')
.map { row -> { row[0] } }
.join(reads_ch)
.splitText().view()

*/
//.join(reads_ch)

}

0 comments on commit 4234d08

Please sign in to comment.