Skip to content

Commit

Permalink
Add new CI for -s and --fn_as_s_name, fix failures.
Browse files Browse the repository at this point in the history
Check that MultiQC modules still work if configuration options are used to modify how sample names are generated.
Fix lots of KeyErrors and similar..!
  • Loading branch information
ewels committed Jul 4, 2021
1 parent a4c277b commit 2afa0b6
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 14 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/multiqc_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ jobs:
- name: All modules / Custom report filename
run: multiqc --lint test_data/data/modules/ --filename full_report.html

- name: All modules / Log filename as s_name, no cleaning
run: multiqc --lint test_data/data/modules/ --fullnames --fn_as_s_name

- name: Filter out all filenames (confirm no report)
run: |
multiqc test_data/data/modules/ --filename all_ignored.html --ignore-samples '*'
Expand Down
3 changes: 3 additions & 0 deletions docs/modules/hicpro.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ Description: >

The HiC-Pro module parses results generated by
[HiC-Pro](https://github.com/nservant/HiC-Pro), a tool for efficient processing and quality control of Hi-C data.

Please note - because this module shares sample identifiers across multiple files,
the `--fn_as_s_name` / `config.use_filename_as_sample_name` functionality has been disabled and has no effect.
2 changes: 1 addition & 1 deletion multiqc/modules/hicexplorer/hicexplorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self):

self.hicexplorer_data = dict()
for f in self.find_log_files("hicexplorer"):
if f["s_name"] != "QC_table":
if f["fn"] != "QC_table.txt":
# Parse the log file
parsed_data = self.parse_logs(f["f"])
# Build the sample ID
Expand Down
3 changes: 1 addition & 2 deletions multiqc/modules/hicpro/hicpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,7 @@ def __init__(self):

def parse_hicpro_stats(self, f, rsection):
"""Parse a HiC-Pro stat file"""
s_name = self.clean_s_name(os.path.basename(f["root"]), f, root=os.path.dirname(f["root"]))

s_name = self.clean_s_name(os.path.basename(f["root"]), root=os.path.dirname(f["root"]))
if s_name not in self.hicpro_data.keys():
self.hicpro_data[s_name] = {}

Expand Down
8 changes: 4 additions & 4 deletions multiqc/modules/mirtop/mirtop.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,14 @@ def parse_mirtop_report(self, f):

content = json.loads(f["f"])
for s_name in content.get("metrics", {}).keys():
s_name = self.clean_s_name(s_name, f)
cleaned_s_name = self.clean_s_name(s_name, f)
## Check for sample name duplicates
if s_name in self.mirtop_data:
log.debug("Duplicate sample name found! Overwriting: {}".format(s_name))
if cleaned_s_name in self.mirtop_data:
log.debug("Duplicate sample name found! Overwriting: {}".format(cleaned_s_name))
parsed_data = content["metrics"][s_name]
parsed_data["read_count"] = parsed_data["isomiR_sum"] + parsed_data["ref_miRNA_sum"]
parsed_data["isomiR_perc"] = (parsed_data["isomiR_sum"] / parsed_data["read_count"]) * 100
self.mirtop_data[s_name] = parsed_data
self.mirtop_data[cleaned_s_name] = parsed_data

def aggregate_snps_in_samples(self):
"""Aggregate info for iso_snp isomiRs (for clarity). "Mean" section will be recomputed"""
Expand Down
6 changes: 3 additions & 3 deletions multiqc/modules/peddy/peddy.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ def __init__(self):
parsed_data = self.parse_peddy_summary(f)
if parsed_data is not None:
for s_name in parsed_data:
s_name = self.clean_s_name(s_name, f)
cleaned_s_name = self.clean_s_name(s_name, f)
try:
self.peddy_data[s_name].update(parsed_data[s_name])
self.peddy_data[cleaned_s_name].update(parsed_data[s_name])
except KeyError:
self.peddy_data[s_name] = parsed_data[s_name]
self.peddy_data[cleaned_s_name] = parsed_data[s_name]

# parse peddy CSV files
for pattern in ["het_check", "ped_check", "sex_check"]:
Expand Down
5 changes: 3 additions & 2 deletions multiqc/modules/qorts/qorts.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ def parse_qorts(self, f):
for l in f["f"]:
s = l.split("\t")
if s_names is None:
s_names = [self.clean_s_name(s_name, f) for s_name in s[1:]]
if len(s_names) <= 2 and s_names[0].endswith("COUNT"):
raw_s_names = s[1:]
s_names = [self.clean_s_name(s_name, f) for s_name in raw_s_names]
if len(s_names) <= 2 and raw_s_names[0].endswith("COUNT"):
if f["fn"] == "QC.summary.txt":
s_names = [self.clean_s_name(os.path.basename(os.path.normpath(f["root"])), f)]
else:
Expand Down
3 changes: 1 addition & 2 deletions multiqc/modules/rna_seqc/rna_seqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,8 @@ def parse_coverage(self, f):
for l in f["f"].splitlines():
s = l.strip().split("\t")
if s_names is None:
s_names = s
s_names = [self.clean_s_name(s_name, f) for s_name in s]
for s_name in s_names:
s_name = self.clean_s_name(s_name, f)
data[s_name] = dict()
else:
for i, v in enumerate(s):
Expand Down

0 comments on commit 2afa0b6

Please sign in to comment.