Update Workspace.__init__ and _parse_wsp_samples to find FCS file…

…s automatically by URI.
whitews · whitews · Sep 12, 2023 · Aug 31, 2023 · Sep 1, 2023 · Sep 4, 2023
commit ee631fc18930f404c066248375b8800f1c5ddf18
diff --git a/flowkit/_models/workspace.py b/flowkit/_models/workspace.py
@@ -3,9 +3,12 @@
 """
 import gc
 import copy
+import os
 import numpy as np
 import pandas as pd
 from bokeh.models import Title
+from urllib.parse import urlparse, unquote
+from urllib.request import url2pathname
 from .._conf import debug
 from .._models import gates, dimension
 from .._utils import plot_utils, wsp_utils, sample_utils, gating_utils
@@ -27,8 +30,10 @@ class Workspace(object):
         missing FCS files (i.e. not in fcs_samples arg) will still be loaded. If False, warnings
         are issued for FCS files found in the WSP file that were not loaded in the Workspace and
         gate data for these missing files will not be retained. Default is False.
+    :param find_fcs_files_from_wsp: Controls whether to search for FCS files based on `URI` params within the FlowJo
+        workspace file.
     """
-    def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False):
+    def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False, find_fcs_files_from_wsp=False):
         # The sample LUT holds sample IDs (keys) only for loaded samples.
         # The values are the Sample instances
         self._sample_lut = {}
@@ -57,13 +62,36 @@ def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False):
         # makes it easier to determine which samples have
         # been analyzed.
         self._results_lut = {}
-
+        
         # load samples we were given, we'll cross-reference against wsp below
         tmp_sample_lut = {s.id: s for s in sample_utils.load_samples(fcs_samples)}
         self._sample_lut = {}
 
+
         wsp_data = wsp_utils.parse_wsp(wsp_file_path)
 
+        # find samples in wsp file. in wsp_data['samples'], each item is a dict which has a key `sample_uri`
+        if find_fcs_files_from_wsp:
+            if fcs_samples is not None:
+                warnings.warn("When `find_fcs_files_from_wsp` is True, `fcs_samples` will be ignored.")
+
+            tmp_sample_lut = {}
+
+            for sample_name in wsp_data['samples']:
+
+                sample_data = wsp_data['samples'][sample_name]
+                sample_uri = sample_data['sample_uri']
+
+                # Convert the URI to a path
+                parsed = urlparse(sample_uri)
+                host = "{0}{0}{mnt}{0}".format(os.path.sep, mnt=parsed.netloc)
+                path = os.path.normpath(os.path.join(host, url2pathname(unquote(parsed.path))))
+
+                # Read in the sample files
+                sample_filedata = sample_utils.load_samples(path)[0]
+
+                tmp_sample_lut[sample_name] = sample_filedata
+
         # save group sample membership, we'll filter by loaded samples next
         group_lut = wsp_data['groups']
 

diff --git a/flowkit/_utils/wsp_utils.py b/flowkit/_utils/wsp_utils.py
@@ -442,6 +442,12 @@ def _parse_wsp_samples(sample_els, ns_map, gating_ns, transform_ns, data_type_ns
         sample_name = sample_node_el.attrib['name']
         sample_id = sample_node_el.attrib['sampleID']
 
+        # Get the sample DataSet parameters, and form there get the URI from the FCS file
+        dataset_el = sample_el.find('DataSet', ns_map)
+        sample_uri = None
+        if 'uri' in dataset_el.attrib.keys():
+            sample_uri = dataset_el.attrib['uri']
+
         # It appears there is only a single set of xforms per sample, one for each channel.
         # And, the xforms have no IDs. We'll extract it and give it IDs based on ???
         sample_xform_lut = _parse_wsp_transforms(transforms_el, transform_ns, data_type_ns)
@@ -471,6 +477,7 @@ def _parse_wsp_samples(sample_els, ns_map, gating_ns, transform_ns, data_type_ns
         # including any custom gates (ones with empty string owning groups).
         wsp_samples[sample_id] = {
             'sample_name': sample_name,
+            'sample_uri': sample_uri,
             'sample_gates': sample_gates,
             'custom_gate_ids': set(),
             'transforms': sample_xform_lut,
@@ -604,6 +611,8 @@ def parse_wsp(workspace_file_or_path, ignore_transforms=False):
             continue
 
         sample_name = sample_dict['sample_name']
+        sample_uri = sample_dict['sample_uri']
+
         sample_gating_strategy = GatingStrategy()
 
         # Add sample's comp matrix & transforms to GatingStrategy
@@ -634,7 +643,8 @@ def parse_wsp(workspace_file_or_path, ignore_transforms=False):
             'compensation': sample_dict['comp'],
             'transforms': sample_dict['transforms'],
             'custom_gate_ids': sample_dict['custom_gate_ids'],
-            'gating_strategy': sample_gating_strategy
+            'gating_strategy': sample_gating_strategy,
+            'sample_uri': sample_uri
         }
 
         processed_samples[sample_name] = processed_sample_data