Add new CI for -s and --fn_as_s_name, fix failures.

Check that MultiQC modules still work if configuration options are used to modify how sample names are generated. Fix lots of KeyErrors and similar..!
tsjzz · Jul 4, 2021 · 2afa0b6 · 2afa0b6
1 parent a4c277b
commit 2afa0b6
Show file tree

Hide file tree

Showing 8 changed files with 19 additions and 14 deletions.
diff --git a/.github/workflows/multiqc_linux.yml b/.github/workflows/multiqc_linux.yml
@@ -41,6 +41,9 @@ jobs:
       - name: All modules / Custom report filename
         run: multiqc --lint test_data/data/modules/ --filename full_report.html
 
+      - name: All modules / Log filename as s_name, no cleaning
+        run: multiqc --lint test_data/data/modules/ --fullnames --fn_as_s_name
+
       - name: Filter out all filenames (confirm no report)
         run: |
           multiqc test_data/data/modules/ --filename all_ignored.html --ignore-samples '*'

diff --git a/docs/modules/hicpro.md b/docs/modules/hicpro.md
@@ -7,3 +7,6 @@ Description: >
 
 The HiC-Pro module parses results generated by
 [HiC-Pro](https://github.com/nservant/HiC-Pro), a tool for efficient processing and quality control of Hi-C data.
+
+Please note - because this module shares sample identifiers across multiple files,
+the `--fn_as_s_name` / `config.use_filename_as_sample_name` functionality has been disabled and has no effect.
diff --git a/multiqc/modules/hicexplorer/hicexplorer.py b/multiqc/modules/hicexplorer/hicexplorer.py
@@ -23,7 +23,7 @@ def __init__(self):
 
         self.hicexplorer_data = dict()
         for f in self.find_log_files("hicexplorer"):
-            if f["s_name"] != "QC_table":
+            if f["fn"] != "QC_table.txt":
                 # Parse the log file
                 parsed_data = self.parse_logs(f["f"])
                 # Build the sample ID

diff --git a/multiqc/modules/hicpro/hicpro.py b/multiqc/modules/hicpro/hicpro.py
@@ -143,8 +143,7 @@ def __init__(self):
 
     def parse_hicpro_stats(self, f, rsection):
         """Parse a HiC-Pro stat file"""
-        s_name = self.clean_s_name(os.path.basename(f["root"]), f, root=os.path.dirname(f["root"]))
-
+        s_name = self.clean_s_name(os.path.basename(f["root"]), root=os.path.dirname(f["root"]))
         if s_name not in self.hicpro_data.keys():
             self.hicpro_data[s_name] = {}
 

diff --git a/multiqc/modules/mirtop/mirtop.py b/multiqc/modules/mirtop/mirtop.py
@@ -90,14 +90,14 @@ def parse_mirtop_report(self, f):
 
         content = json.loads(f["f"])
         for s_name in content.get("metrics", {}).keys():
-            s_name = self.clean_s_name(s_name, f)
+            cleaned_s_name = self.clean_s_name(s_name, f)
             ## Check for sample name duplicates
-            if s_name in self.mirtop_data:
-                log.debug("Duplicate sample name found! Overwriting: {}".format(s_name))
+            if cleaned_s_name in self.mirtop_data:
+                log.debug("Duplicate sample name found! Overwriting: {}".format(cleaned_s_name))
             parsed_data = content["metrics"][s_name]
             parsed_data["read_count"] = parsed_data["isomiR_sum"] + parsed_data["ref_miRNA_sum"]
             parsed_data["isomiR_perc"] = (parsed_data["isomiR_sum"] / parsed_data["read_count"]) * 100
-            self.mirtop_data[s_name] = parsed_data
+            self.mirtop_data[cleaned_s_name] = parsed_data
 
     def aggregate_snps_in_samples(self):
         """Aggregate info for iso_snp isomiRs (for clarity). "Mean" section will be recomputed"""

diff --git a/multiqc/modules/peddy/peddy.py b/multiqc/modules/peddy/peddy.py
@@ -40,11 +40,11 @@ def __init__(self):
             parsed_data = self.parse_peddy_summary(f)
             if parsed_data is not None:
                 for s_name in parsed_data:
-                    s_name = self.clean_s_name(s_name, f)
+                    cleaned_s_name = self.clean_s_name(s_name, f)
                     try:
-                        self.peddy_data[s_name].update(parsed_data[s_name])
+                        self.peddy_data[cleaned_s_name].update(parsed_data[s_name])
                     except KeyError:
-                        self.peddy_data[s_name] = parsed_data[s_name]
+                        self.peddy_data[cleaned_s_name] = parsed_data[s_name]
 
         # parse peddy CSV files
         for pattern in ["het_check", "ped_check", "sex_check"]:

diff --git a/multiqc/modules/qorts/qorts.py b/multiqc/modules/qorts/qorts.py
@@ -55,8 +55,9 @@ def parse_qorts(self, f):
         for l in f["f"]:
             s = l.split("\t")
             if s_names is None:
-                s_names = [self.clean_s_name(s_name, f) for s_name in s[1:]]
-                if len(s_names) <= 2 and s_names[0].endswith("COUNT"):
+                raw_s_names = s[1:]
+                s_names = [self.clean_s_name(s_name, f) for s_name in raw_s_names]
+                if len(s_names) <= 2 and raw_s_names[0].endswith("COUNT"):
                     if f["fn"] == "QC.summary.txt":
                         s_names = [self.clean_s_name(os.path.basename(os.path.normpath(f["root"])), f)]
                     else:

diff --git a/multiqc/modules/rna_seqc/rna_seqc.py b/multiqc/modules/rna_seqc/rna_seqc.py
@@ -132,9 +132,8 @@ def parse_coverage(self, f):
         for l in f["f"].splitlines():
             s = l.strip().split("\t")
             if s_names is None:
-                s_names = s
+                s_names = [self.clean_s_name(s_name, f) for s_name in s]
                 for s_name in s_names:
-                    s_name = self.clean_s_name(s_name, f)
                     data[s_name] = dict()
             else:
                 for i, v in enumerate(s):