Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add automatic FCS file reading from FlowJo WSP files #168

Merged
merged 6 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 53 additions & 2 deletions flowkit/_models/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
"""
import gc
import copy
import os
import numpy as np
import pandas as pd
from bokeh.models import Title
from urllib.parse import urlparse, unquote
from urllib.request import url2pathname
from pathlib import Path
from .._conf import debug
from .._models import gates, dimension
from .._utils import plot_utils, wsp_utils, sample_utils, gating_utils
Expand All @@ -27,8 +31,10 @@ class Workspace(object):
missing FCS files (i.e. not in fcs_samples arg) will still be loaded. If False, warnings
are issued for FCS files found in the WSP file that were not loaded in the Workspace and
gate data for these missing files will not be retained. Default is False.
:param find_fcs_files_from_wsp: Controls whether to search for FCS files based on `URI` params within the FlowJo
workspace file.
"""
def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False):
def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False, find_fcs_files_from_wsp=False):
# The sample LUT holds sample IDs (keys) only for loaded samples.
# The values are the Sample instances
self._sample_lut = {}
Expand Down Expand Up @@ -57,13 +63,58 @@ def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False):
# makes it easier to determine which samples have
# been analyzed.
self._results_lut = {}

# load samples we were given, we'll cross-reference against wsp below
tmp_sample_lut = {s.id: s for s in sample_utils.load_samples(fcs_samples)}
self._sample_lut = {}


wsp_data = wsp_utils.parse_wsp(wsp_file_path)

# find samples in wsp file. in wsp_data['samples'], each item is a dict which has a key `sample_uri`
if find_fcs_files_from_wsp:
def uri_to_path(uri):
"""Convert a URI to a file path, handling both relative and absolute paths."""
parsed = urlparse(uri)

if parsed.scheme not in ('file', ''):
raise ValueError("Unsupported URI scheme: {}".format(parsed.scheme))

path = unquote(parsed.path)

# if the path is relative, join it with the wsp file's directory
if os.path.isabs(path):
return path
else:
# The relative path is relative to the wsp file's directory, so prepend that.
base_path = os.path.dirname(os.path.abspath(Path(wsp_file_path)))
return os.path.join(base_path, path)

if fcs_samples is not None:
warnings.warn("When `find_fcs_files_from_wsp` is True, `fcs_samples` will be ignored.")

tmp_sample_lut = {}

for sample_name in wsp_data['samples']:

sample_data = wsp_data['samples'][sample_name]
sample_uri = sample_data['sample_uri']

# Convert the URI to a path
path = uri_to_path(sample_uri)

# Read in the sample files
try:
sample_filedata = sample_utils.load_samples(path)[0]

# Update the ID of the loaded data (otherwise analysis breaks)
sample_filedata.id = sample_name

tmp_sample_lut[sample_name] = sample_filedata

except Exception as e:
warnings.warn("Sample file not found at path: {}".format(path))

# save group sample membership, we'll filter by loaded samples next
group_lut = wsp_data['groups']

Expand Down
12 changes: 11 additions & 1 deletion flowkit/_utils/wsp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,12 @@ def _parse_wsp_samples(sample_els, ns_map, gating_ns, transform_ns, data_type_ns
sample_name = sample_node_el.attrib['name']
sample_id = sample_node_el.attrib['sampleID']

# Get the sample DataSet parameters, and form there get the URI from the FCS file
dataset_el = sample_el.find('DataSet', ns_map)
sample_uri = None
if 'uri' in dataset_el.attrib.keys():
sample_uri = dataset_el.attrib['uri']

# It appears there is only a single set of xforms per sample, one for each channel.
# And, the xforms have no IDs. We'll extract it and give it IDs based on ???
sample_xform_lut = _parse_wsp_transforms(transforms_el, transform_ns, data_type_ns)
Expand Down Expand Up @@ -471,6 +477,7 @@ def _parse_wsp_samples(sample_els, ns_map, gating_ns, transform_ns, data_type_ns
# including any custom gates (ones with empty string owning groups).
wsp_samples[sample_id] = {
'sample_name': sample_name,
'sample_uri': sample_uri,
'sample_gates': sample_gates,
'custom_gate_ids': set(),
'transforms': sample_xform_lut,
Expand Down Expand Up @@ -604,6 +611,8 @@ def parse_wsp(workspace_file_or_path, ignore_transforms=False):
continue

sample_name = sample_dict['sample_name']
sample_uri = sample_dict['sample_uri']

sample_gating_strategy = GatingStrategy()

# Add sample's comp matrix & transforms to GatingStrategy
Expand Down Expand Up @@ -634,7 +643,8 @@ def parse_wsp(workspace_file_or_path, ignore_transforms=False):
'compensation': sample_dict['comp'],
'transforms': sample_dict['transforms'],
'custom_gate_ids': sample_dict['custom_gate_ids'],
'gating_strategy': sample_gating_strategy
'gating_strategy': sample_gating_strategy,
'sample_uri': sample_uri
}

processed_samples[sample_name] = processed_sample_data
Expand Down