Skip to content

Commit

Permalink
improve partial workflows [ci skip]
Browse files Browse the repository at this point in the history
  • Loading branch information
abhi18av committed Jun 23, 2024
1 parent 6644d13 commit b935da2
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 12 deletions.
20 changes: 16 additions & 4 deletions bin/fastq_cohort_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,24 @@
with open('samplesheet.json', 'r') as f:
magma_analysis_dict = json.load(f)

fastq_report_keys_list = list(fastq_report_dict.keys())

for k in magma_analysis_dict.keys():
fastq_1_name = magma_analysis_dict[k]['R1'].split("/")[-1]
magma_analysis_dict[k]["fastq_report"] = {fastq_1_name: {"file": fastq_report_dict[fastq_1_name]}}
if magma_analysis_dict[k]['R2'] != None:
magma_analysis_dict[k]["fastq_report"] = {}

if magma_analysis_dict[k]['R1'] is not None:
fastq_1_name = magma_analysis_dict[k]['R1'].split("/")[-1]
if fastq_1_name in fastq_report_keys_list:
magma_analysis_dict[k]["fastq_report"][fastq_1_name] = {"file": fastq_report_dict[fastq_1_name]}
else:
magma_analysis_dict[k]["fastq_report"][fastq_1_name] = {"fastq_utils_check": "failed"}

if magma_analysis_dict[k]['R2'] is not None:
fastq_2_name = magma_analysis_dict[k]['R2'].split("/")[-1]
magma_analysis_dict[k]["fastq_report"][fastq_2_name] = {"file": fastq_report_dict[fastq_2_name]}
if fastq_2_name in fastq_report_keys_list:
magma_analysis_dict[k]["fastq_report"][fastq_2_name] = {"file": fastq_report_dict[fastq_2_name]}
else:
magma_analysis_dict[k]["fastq_report"][fastq_2_name] = {"fastq_utils_check": "failed"}

with open('magma_analysis.json', 'w') as f:
json.dump(magma_analysis_dict, f, indent=4)
Expand Down
17 changes: 10 additions & 7 deletions bin/samplesheet_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@
ss = pd.read_csv(args['input_file'])

# Create another column by adding Sample and Attempt columns
ss['MagmaSampleName'] = ss['Study'].astype(str) + \
"." + ss['Sample'].astype(str) + \
".L" + ss['Library'].astype(str) + \
".A" + ss['Attempt'].astype(str) + \
"." + ss['Flowcell'].astype(str) + \
"." + ss['Lane'].astype(str) + \
"." + ss['Index Sequence'].astype(str) # Corrected column name
ss['magma_sample_name'] = ss['Study'].astype(str) + \
"." + ss['Sample'].astype(str) + \
".L" + ss['Library'].astype(str) + \
".A" + ss['Attempt'].astype(str) + \
"." + ss['Flowcell'].astype(str) + \
"." + ss['Lane'].astype(str) + \
"." + ss['Index Sequence'].astype(str) # Corrected column name

# FIXME Add the info for BWA MEM mapipng using a derived column into the dataframe
# bam_rg_string ="@RG\\tID:${flowcell}.${lane}\\tSM:${study}.${sample}\\tPL:illumina\\tLB:lib${library}\\tPU:${flowcell}.${lane}.${index_sequence}"

fail = False
for idx, row in ss.iterrows():
Expand Down
1 change: 1 addition & 0 deletions modules/utils/.#fastq_cohort_validation.nf
2 changes: 1 addition & 1 deletion modules/utils/fastq_cohort_validation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process UTILS_FASTQ_COHORT_VALIDATION {
"""
csvtk concat fastq_reports/* | csvtk csv2json > merged_fastq_reports.json
csvtk csv2json ${magma_validated_samplesheet} -k MagmaSampleName > samplesheet.json
csvtk csv2json ${magma_validated_samplesheet} -k magma_sample_name > samplesheet.json
fastq_cohort_validation.py ${params.vcf_name}
"""
Expand Down

0 comments on commit b935da2

Please sign in to comment.