Skip to content

Commit

Permalink
Merge pull request #168 from hbhargava7/hkb-add-autoread-fcs-from-flowjo
Browse files Browse the repository at this point in the history
Add automatic FCS file reading from FlowJo WSP files
  • Loading branch information
whitews authored Sep 12, 2023
2 parents 1904bf6 + b314cd6 commit 484fa25
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 3 deletions.
55 changes: 53 additions & 2 deletions flowkit/_models/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
"""
import gc
import copy
import os
import numpy as np
import pandas as pd
from bokeh.models import Title
from urllib.parse import urlparse, unquote
from urllib.request import url2pathname
from pathlib import Path
from .._conf import debug
from .._utils import plot_utils, wsp_utils, sample_utils, gating_utils
from ..exceptions import FlowKitException, GateReferenceError
Expand All @@ -26,8 +30,10 @@ class Workspace(object):
missing FCS files (i.e. not in fcs_samples arg) will still be loaded. If False, warnings
are issued for FCS files found in the WSP file that were not loaded in the Workspace and
gate data for these missing files will not be retained. Default is False.
:param find_fcs_files_from_wsp: Controls whether to search for FCS files based on `URI` params within the FlowJo
workspace file.
"""
def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False):
def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False, find_fcs_files_from_wsp=False):
# The sample LUT holds sample IDs (keys) only for loaded samples.
# The values are the Sample instances
self._sample_lut = {}
Expand Down Expand Up @@ -56,13 +62,58 @@ def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False):
# makes it easier to determine which samples have
# been analyzed.
self._results_lut = {}

# load samples we were given, we'll cross-reference against wsp below
tmp_sample_lut = {s.id: s for s in sample_utils.load_samples(fcs_samples)}
self._sample_lut = {}


wsp_data = wsp_utils.parse_wsp(wsp_file_path)

# find samples in wsp file. in wsp_data['samples'], each item is a dict which has a key `sample_uri`
if find_fcs_files_from_wsp:
def uri_to_path(uri):
"""Convert a URI to a file path, handling both relative and absolute paths."""
parsed = urlparse(uri)

if parsed.scheme not in ('file', ''):
raise ValueError("Unsupported URI scheme: {}".format(parsed.scheme))

path = unquote(parsed.path)

# if the path is relative, join it with the wsp file's directory
if os.path.isabs(path):
return path
else:
# The relative path is relative to the wsp file's directory, so prepend that.
base_path = os.path.dirname(os.path.abspath(Path(wsp_file_path)))
return os.path.join(base_path, path)

if fcs_samples is not None:
warnings.warn("When `find_fcs_files_from_wsp` is True, `fcs_samples` will be ignored.")

tmp_sample_lut = {}

for sample_name in wsp_data['samples']:

sample_data = wsp_data['samples'][sample_name]
sample_uri = sample_data['sample_uri']

# Convert the URI to a path
path = uri_to_path(sample_uri)

# Read in the sample files
try:
sample_filedata = sample_utils.load_samples(path)[0]

# Update the ID of the loaded data (otherwise analysis breaks)
sample_filedata.id = sample_name

tmp_sample_lut[sample_name] = sample_filedata

except Exception as e:
warnings.warn("Sample file not found at path: {}".format(path))

# save group sample membership, we'll filter by loaded samples next
group_lut = wsp_data['groups']

Expand Down
12 changes: 11 additions & 1 deletion flowkit/_utils/wsp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,12 @@ def _parse_wsp_samples(sample_els, ns_map, gating_ns, transform_ns, data_type_ns
sample_name = sample_node_el.attrib['name']
sample_id = sample_node_el.attrib['sampleID']

# Get the sample DataSet parameters, and form there get the URI from the FCS file
dataset_el = sample_el.find('DataSet', ns_map)
sample_uri = None
if 'uri' in dataset_el.attrib.keys():
sample_uri = dataset_el.attrib['uri']

# It appears there is only a single set of xforms per sample, one for each channel.
# And, the xforms have no IDs. We'll extract it and give it IDs based on ???
sample_xform_lut = _parse_wsp_transforms(transforms_el, transform_ns, data_type_ns)
Expand Down Expand Up @@ -470,6 +476,7 @@ def _parse_wsp_samples(sample_els, ns_map, gating_ns, transform_ns, data_type_ns
# including any custom gates (ones with empty string owning groups).
wsp_samples[sample_id] = {
'sample_name': sample_name,
'sample_uri': sample_uri,
'sample_gates': sample_gates,
'custom_gate_ids': set(),
'transforms': sample_xform_lut,
Expand Down Expand Up @@ -597,6 +604,8 @@ def parse_wsp(workspace_file_or_path):

for sample_id, sample_dict in wsp_samples.items():
sample_name = sample_dict['sample_name']
sample_uri = sample_dict['sample_uri']

sample_gating_strategy = GatingStrategy()

# Add sample's comp matrix & transforms to GatingStrategy
Expand Down Expand Up @@ -626,7 +635,8 @@ def parse_wsp(workspace_file_or_path):
'compensation': sample_dict['comp'],
'transforms': sample_dict['transforms'],
'custom_gate_ids': sample_dict['custom_gate_ids'],
'gating_strategy': sample_gating_strategy
'gating_strategy': sample_gating_strategy,
'sample_uri': sample_uri
}

processed_samples[sample_name] = processed_sample_data
Expand Down

0 comments on commit 484fa25

Please sign in to comment.