Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add automatic FCS file reading from FlowJo WSP files #168

Merged
merged 6 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Update Workspace.__init__ and _parse_wsp_samples to find FCS file…
…s automatically by URI.
  • Loading branch information
hbhargava7 committed Aug 31, 2023
commit ee631fc18930f404c066248375b8800f1c5ddf18
32 changes: 30 additions & 2 deletions flowkit/_models/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
"""
import gc
import copy
import os
import numpy as np
import pandas as pd
from bokeh.models import Title
from urllib.parse import urlparse, unquote
from urllib.request import url2pathname
from .._conf import debug
from .._models import gates, dimension
from .._utils import plot_utils, wsp_utils, sample_utils, gating_utils
Expand All @@ -27,8 +30,10 @@ class Workspace(object):
missing FCS files (i.e. not in fcs_samples arg) will still be loaded. If False, warnings
are issued for FCS files found in the WSP file that were not loaded in the Workspace and
gate data for these missing files will not be retained. Default is False.
:param find_fcs_files_from_wsp: Controls whether to search for FCS files based on `URI` params within the FlowJo
workspace file.
"""
def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False):
def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False, find_fcs_files_from_wsp=False):
# The sample LUT holds sample IDs (keys) only for loaded samples.
# The values are the Sample instances
self._sample_lut = {}
Expand Down Expand Up @@ -57,13 +62,36 @@ def __init__(self, wsp_file_path, fcs_samples=None, ignore_missing_files=False):
# makes it easier to determine which samples have
# been analyzed.
self._results_lut = {}

# load samples we were given, we'll cross-reference against wsp below
tmp_sample_lut = {s.id: s for s in sample_utils.load_samples(fcs_samples)}
self._sample_lut = {}


wsp_data = wsp_utils.parse_wsp(wsp_file_path)

# find samples in wsp file. in wsp_data['samples'], each item is a dict which has a key `sample_uri`
if find_fcs_files_from_wsp:
if fcs_samples is not None:
warnings.warn("When `find_fcs_files_from_wsp` is True, `fcs_samples` will be ignored.")

tmp_sample_lut = {}

for sample_name in wsp_data['samples']:

sample_data = wsp_data['samples'][sample_name]
sample_uri = sample_data['sample_uri']

# Convert the URI to a path
parsed = urlparse(sample_uri)
host = "{0}{0}{mnt}{0}".format(os.path.sep, mnt=parsed.netloc)
path = os.path.normpath(os.path.join(host, url2pathname(unquote(parsed.path))))

# Read in the sample files
sample_filedata = sample_utils.load_samples(path)[0]

tmp_sample_lut[sample_name] = sample_filedata

# save group sample membership, we'll filter by loaded samples next
group_lut = wsp_data['groups']

Expand Down
12 changes: 11 additions & 1 deletion flowkit/_utils/wsp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,12 @@ def _parse_wsp_samples(sample_els, ns_map, gating_ns, transform_ns, data_type_ns
sample_name = sample_node_el.attrib['name']
sample_id = sample_node_el.attrib['sampleID']

# Get the sample DataSet parameters, and form there get the URI from the FCS file
dataset_el = sample_el.find('DataSet', ns_map)
sample_uri = None
if 'uri' in dataset_el.attrib.keys():
sample_uri = dataset_el.attrib['uri']

# It appears there is only a single set of xforms per sample, one for each channel.
# And, the xforms have no IDs. We'll extract it and give it IDs based on ???
sample_xform_lut = _parse_wsp_transforms(transforms_el, transform_ns, data_type_ns)
Expand Down Expand Up @@ -471,6 +477,7 @@ def _parse_wsp_samples(sample_els, ns_map, gating_ns, transform_ns, data_type_ns
# including any custom gates (ones with empty string owning groups).
wsp_samples[sample_id] = {
'sample_name': sample_name,
'sample_uri': sample_uri,
'sample_gates': sample_gates,
'custom_gate_ids': set(),
'transforms': sample_xform_lut,
Expand Down Expand Up @@ -604,6 +611,8 @@ def parse_wsp(workspace_file_or_path, ignore_transforms=False):
continue

sample_name = sample_dict['sample_name']
sample_uri = sample_dict['sample_uri']

sample_gating_strategy = GatingStrategy()

# Add sample's comp matrix & transforms to GatingStrategy
Expand Down Expand Up @@ -634,7 +643,8 @@ def parse_wsp(workspace_file_or_path, ignore_transforms=False):
'compensation': sample_dict['comp'],
'transforms': sample_dict['transforms'],
'custom_gate_ids': sample_dict['custom_gate_ids'],
'gating_strategy': sample_gating_strategy
'gating_strategy': sample_gating_strategy,
'sample_uri': sample_uri
}

processed_samples[sample_name] = processed_sample_data
Expand Down