Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: ♻️ Refactor npTDMS #290

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
refactor: ♻️ Refactor npTDMS
Refactoring npTDMS via:

1. https://github.com/sourcery-ai/sourcery
2. `autopep8 --in-place --aggressive --aggressive ./nptdms/**`
3.  Extract duplicates
  • Loading branch information
Anselmoo committed Nov 22, 2022
commit 2da7c4a60e131593d7a8913ed1d85905e06f9e0f
3 changes: 1 addition & 2 deletions nptdms/base_segment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from io import UnsupportedOperation
import numpy as np

from nptdms.log import log_manager
Expand Down Expand Up @@ -32,7 +31,7 @@ def scaler_data_types(self):
return None

def __repr__(self):
return "%s(%s)" % (self.__class__.__name__, self.path)
return f"{self.__class__.__name__}({self.path})"


class BaseDataReader(object):
Expand Down
10 changes: 4 additions & 6 deletions nptdms/channel_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,7 @@ class ListDataReceiver(object):
def __init__(self, channel):
"""Initialise new data receiver for a TDMS object
"""
if channel.data_type == types.String:
self._dtype = np.dtype('O')
else:
self._dtype = None
self._dtype = np.dtype('O') if channel.data_type == types.String else None
self._data = []
self.scaler_data = {}

Expand Down Expand Up @@ -183,8 +180,9 @@ def slice_raw_data(raw_data, offset, length=None):
return raw_data
end = None if length is None else offset + length
data = None if raw_data.data is None else raw_data.data[offset:end]
scaler_data = dict(
(scale_id, scaler_data[offset:end]) for (scale_id, scaler_data) in raw_data.scaler_data.items())
scaler_data = {scale_id: scaler_data[offset:end] for (scale_id, scaler_data)
in raw_data.scaler_data.items()}

return RawDataSlice(data, scaler_data)


Expand Down
3 changes: 2 additions & 1 deletion nptdms/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ class ObjectPath(object):
:ivar group: Group name or None for the root object
:ivar channel: Channel name or None for the root object or a group objecct
"""

def __init__(self, *path_components):
self.group = None
self.channel = None
if len(path_components) > 0:
if path_components:
self.group = path_components[0]
if len(path_components) > 1:
self.channel = path_components[1]
Expand Down
88 changes: 40 additions & 48 deletions nptdms/daqmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
class DaqmxDataReader(BaseDataReader):
""" A TDMS segment with DAQmx data
"""

def _read_data_chunk(self, file, data_objects, chunk_index):
"""Read data from DAQmx data segment"""

Expand All @@ -40,7 +41,7 @@ def _read_data_chunk(self, file, data_objects, chunk_index):

# Now get arrays for each scaler of each channel where the scaler
# data comes from this raw buffer
for (i, obj) in enumerate(data_objects):
for obj in data_objects:
scalers_for_raw_buffer_index = [
scaler for scaler in obj.daqmx_metadata.scalers
if scaler.raw_buffer_index == raw_buffer_index]
Expand All @@ -60,9 +61,9 @@ def _read_data_chunk(self, file, data_objects, chunk_index):
else:
data[obj.path] = processed_data

combined_data = {}
for path, data in data.items():
combined_data[path] = RawChannelDataChunk.channel_data(data)
combined_data = {path: RawChannelDataChunk.channel_data(data)
for path, data in data.items()}

for path, data in scaler_data.items():
combined_data[path] = RawChannelDataChunk.scaler_data(data)
return RawDataChunk(combined_data)
Expand Down Expand Up @@ -92,10 +93,10 @@ def get_daqmx_final_chunk_lengths(ordered_objects, chunk_size_bytes):
for obj in ordered_objects:
if not obj.has_data:
continue
buffer_indices = list(set(s.raw_buffer_index for s in obj.daqmx_metadata.scalers))
buffer_indices = list({s.raw_buffer_index for s in obj.daqmx_metadata.scalers})
if len(buffer_indices) == 1:
object_lengths[obj.path] = updated_buffer_lengths[buffer_indices[0]]
# Else scalers are in different buffers, not sure this is even valid
# Else scalers are in different buffers, not sure this is even valid
return object_lengths


Expand All @@ -109,14 +110,12 @@ def get_buffer_dimensions(ordered_objects):
continue
daqmx_metadata = o.daqmx_metadata
if dimensions is None:
raw_data_widths = daqmx_metadata.raw_data_widths
# Set width for each buffer
dimensions = [(0, w) for w in raw_data_widths]
else:
if not _lists_are_equal(daqmx_metadata.raw_data_widths, raw_data_widths):
raise ValueError(
"Raw data widths for object %r (%s) do not match previous widths (%s)" %
(o, daqmx_metadata.raw_data_widths, raw_data_widths))
dimensions = [(0, w) for w in daqmx_metadata.raw_data_widths]
elif not _lists_are_equal(daqmx_metadata.raw_data_widths, raw_data_widths):
raise ValueError(
"Raw data widths for object %r (%s) do not match previous widths (%s)" %
(o, daqmx_metadata.raw_data_widths, raw_data_widths))
# Now set the buffer number of values based on the object chunk size
for scaler in daqmx_metadata.scalers:
buffer_index = scaler.raw_buffer_index
Expand Down Expand Up @@ -152,8 +151,8 @@ def read_raw_data_index(self, f, raw_data_index_header, endianness):
data_type_val = types.Uint32.read(f, endianness)
try:
self.data_type = types.tds_data_types[data_type_val]
except KeyError:
raise KeyError("Unrecognised data type: %s" % data_type_val)
except KeyError as e:
raise KeyError(f"Unrecognised data type: {data_type_val}") from e

daqmx_metadata = DaqMxMetadata(f, endianness, raw_data_index_header, self.data_type)
log.debug("DAQmx metadata: %r", daqmx_metadata)
Expand All @@ -166,9 +165,7 @@ def read_raw_data_index(self, f, raw_data_index_header, endianness):
def scaler_data_types(self):
if self.daqmx_metadata is None:
return None
return dict(
(s.scale_id, s.data_type)
for s in self.daqmx_metadata.scalers)
return {s.scale_id: s.data_type for s in self.daqmx_metadata.scalers}


class DaqMxMetadata(object):
Expand All @@ -179,17 +176,17 @@ class DaqMxMetadata(object):
'chunk_size',
'raw_data_widths',
'scalers',
]
]

def __init__(self, f, endianness, scaler_type, channel_data_type):
"""
Read the metadata for a DAQmx raw segment. This is the raw
DAQmx-specific portion of the raw data index.
"""
metadata_bytes = f.read(16)
(dimension,
self.chunk_size,
scaler_vector_length) = _struct_unpack(endianness + 'LQL', metadata_bytes)
(dimension, self.chunk_size, scaler_vector_length) = _struct_unpack(
f'{endianness}LQL', metadata_bytes
)

# In TDMS format version 2.0, 1 is the only valid value for dimension
if dimension != 1:
Expand All @@ -202,11 +199,17 @@ def __init__(self, f, endianness, scaler_type, channel_data_type):

if channel_data_type != types.DaqMxRawData:
if scaler_vector_length != 1:
raise ValueError("Expected only one scaler for channel with type %s" % channel_data_type.__name__)
raise ValueError(
"Expected only one scaler for channel"
f" with type {channel_data_type.__name__}"
)

if self.scalers[0].data_type != channel_data_type:
raise ValueError(
"Expected scaler data type to be %s but got %s" %
(channel_data_type.__name__, self.scalers[0].data_type.__name__))
"Expected scaler data type to be"
f" {channel_data_type.__name__} but got"
f" {self.scalers[0].data_type.__name__}"
)

# Read raw data widths.
# This is an array of widths in bytes, which should be the same
Expand All @@ -221,12 +224,10 @@ def __init__(self, f, endianness, scaler_type, channel_data_type):
def __repr__(self):
""" Return string representation of DAQmx metadata
"""
properties = (
"%s=%s" % (name, _get_attr_repr(self, name))
for name in self.__slots__)
properties = (f"{name}={_get_attr_repr(self, name)}" for name in self.__slots__)

properties_list = ", ".join(properties)
return "%s(%s)" % (self.__class__.__name__, properties_list)
return f"{self.__class__.__name__}({properties_list})"


class DaqMxScaler(object):
Expand All @@ -239,7 +240,7 @@ class DaqMxScaler(object):
'raw_buffer_index',
'raw_byte_offset',
'sample_format_bitmap',
]
]

def __init__(self, open_file, endianness):
scaler_bytes = open_file.read(20)
Expand All @@ -248,7 +249,7 @@ def __init__(self, open_file, endianness):
self.raw_buffer_index,
self.raw_byte_offset,
self.sample_format_bitmap,
self.scale_id) = _struct_unpack(endianness + 'LLLLL', scaler_bytes)
self.scale_id) = _struct_unpack(f'{endianness}LLLLL', scaler_bytes)

self.data_type = DAQMX_TYPES[data_type_code]

Expand All @@ -259,12 +260,10 @@ def postprocess_data(self, data):
return data

def __repr__(self):
properties = (
"%s=%s" % (name, _get_attr_repr(self, name))
for name in self.__slots__)
properties = (f"{name}={_get_attr_repr(self, name)}" for name in self.__slots__)

properties_list = ", ".join(properties)
return "%s(%s)" % (self.__class__.__name__, properties_list)
return f"{self.__class__.__name__}({properties_list})"


class DigitalLineScaler(object):
Expand All @@ -277,16 +276,13 @@ class DigitalLineScaler(object):
'raw_buffer_index',
'raw_bit_offset',
'sample_format_bitmap',
]
]

def __init__(self, open_file, endianness):
scaler_bytes = open_file.read(17)

(data_type_code,
self.raw_buffer_index,
self.raw_bit_offset,
self.sample_format_bitmap,
self.scale_id) = _struct_unpack(endianness + 'LLLBL', scaler_bytes)
(data_type_code, self.raw_buffer_index, self.raw_bit_offset, self.sample_format_bitmap,
self.scale_id) = _struct_unpack(f'{endianness}LLLBL', scaler_bytes)

self.data_type = DAQMX_TYPES[data_type_code]

Expand All @@ -299,19 +295,15 @@ def postprocess_data(self, data):
return np.right_shift(np.bitwise_and(data, bitmask), bit_offset)

def __repr__(self):
properties = (
"%s=%s" % (name, _get_attr_repr(self, name))
for name in self.__slots__)
properties = (f"{name}={_get_attr_repr(self, name)}" for name in self.__slots__)

properties_list = ", ".join(properties)
return "%s(%s)" % (self.__class__.__name__, properties_list)
return f"{self.__class__.__name__}({properties_list})"


def _get_attr_repr(obj, attr_name):
val = getattr(obj, attr_name)
if isinstance(val, type):
return val.__name__
return repr(val)
return val.__name__ if isinstance(val, type) else repr(val)


def _lists_are_equal(a, b):
Expand Down
2 changes: 1 addition & 1 deletion nptdms/export/hdf_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def from_tdms_file(tdms_file, filepath, mode='w', group='/'):

# Write properties and data for each channel
for channel in group.channels():
channel_key = group.name + '/' + channel.name
channel_key = f'{group.name}/{channel.name}'

if channel.data_type is types.String:
# Encode as variable length UTF-8 strings
Expand Down
16 changes: 7 additions & 9 deletions nptdms/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(self, tdms_file):
self._file_path = source_path
self._file = open(self._file_path, "rb")

filepath = self._file_path + '_index'
filepath = f'{self._file_path}_index'
if os.path.isfile(filepath):
self._index_file_path = filepath
self._index_file = open(self._index_file_path, "rb")
Expand Down Expand Up @@ -142,8 +142,7 @@ def read_raw_data(self):
"Cannot read data unless metadata has first been read")
for segment in self._segments:
self._verify_segment_start(segment)
for chunk in segment.read_raw_data(self._file):
yield chunk
yield from segment.read_raw_data(self._file)

def read_raw_data_for_channel(self, channel_path, offset=0, length=None):
""" Read raw data for a single channel, chunk by chunk
Expand Down Expand Up @@ -299,7 +298,7 @@ def _read_lead_in(self, file, segment_position, is_index_file=False):
endianness = '>' if (toc_mask & toc_properties['kTocBigEndian']) else '<'

# Next four bytes are version number, then 8 bytes each for the offset values
(version, next_segment_offset, raw_data_offset) = _struct_unpack(endianness + 'lQQ', lead_in_bytes[8:28])
(version, next_segment_offset, raw_data_offset) = _struct_unpack(f'{endianness}lQQ', lead_in_bytes[8:28])

if self.tdms_version is None:
if version not in (4712, 4713):
Expand Down Expand Up @@ -328,7 +327,7 @@ def _read_lead_in(self, file, segment_position, is_index_file=False):
log.debug("Next segment offset = %d, raw data offset = %d, data size = %d b",
next_segment_offset, raw_data_offset, next_segment_offset - raw_data_offset)
next_segment_pos = (
segment_position + next_segment_offset + lead_size)
segment_position + next_segment_offset + lead_size)

return segment_position, toc_mask, data_position, next_segment_pos, segment_incomplete

Expand Down Expand Up @@ -464,6 +463,7 @@ def _update_object_scaler_data_types(path, obj, segment_object):
class ObjectMetadata(object):
""" Stores information about an object in a TDMS file
"""

def __init__(self):
self.properties = OrderedDict()
self.data_type = None
Expand All @@ -474,10 +474,8 @@ def __init__(self):
def _trim_channel_chunk(chunk, skip=0, trim=0):
if skip == 0 and trim == 0:
return chunk
data = None
scaler_data = None
if chunk.data is not None:
data = chunk.data[skip:len(chunk.data) - trim]
data = None if chunk.data is None else chunk.data[skip:len(chunk.data) - trim]
if chunk.scaler_data is not None:
scaler_data = {
scale_id: d[skip:len(d) - trim]
Expand Down Expand Up @@ -506,6 +504,6 @@ def _array_equal(a, b, chunk_size=100):
num_chunks = (len(a) + chunk_size - 1) // chunk_size
for i in range(num_chunks):
offset = i * chunk_size
if not (a[offset:offset+chunk_size] == b[offset:offset+chunk_size]).all():
if not (a[offset:offset + chunk_size] == b[offset:offset + chunk_size]).all():
return False
return True
Loading