Skip to content

Commit

Permalink
Read data section code fixed up (kinverarity1#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
kinverarity1 authored and dcslagel committed Jul 8, 2020
1 parent 849ae40 commit 4615788
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 40 deletions.
84 changes: 45 additions & 39 deletions lasio/las.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def read(
)

provisional_version = 2.0
provisional_wrapped = True
provisional_wrapped = "YES"
provisional_null = None

section_positions = reader.find_sections_in_file(file_obj)
Expand Down Expand Up @@ -213,6 +213,11 @@ def read(
]:
logger.debug("Reading data section {}".format(section_title))

file_obj.seek(k)
n_columns = reader.inspect_data_section(
file_obj, (first_line, last_line)
)

file_obj.seek(k)
arr = reader.read_data_section_iterative(
file_obj, (first_line, last_line), regexp_subs, value_null_subs
Expand All @@ -222,10 +227,45 @@ def read(
# TODO: check whether this treatment of NULLs is correct
arr[arr == provisional_null] = np.nan

# TODO: work out how to do array reshaping.
n_curves = len(self.curves)
n_arr_cols = len(self.curves)
# Provisionally, assume that the number of columns represented
# by the data section's array is equal to the number of columns
# defined in the Curves/Definition section.

n_columns_in_arr = len(self.curves)

# If we are told the file is unwrapped, then we assume that each
# column detected is a column, and we ignore the Curves/Definition
# section's number of columns instead.

if provisional_wrapped == "NO":
if len(self.curves) > n_columns:
n_columns_in_arr = n_columns

logger.debug(
"Data array (size {}) assumed to have {} columns "
"({} curves defined)".format(
arr.shape, n_columns_in_arr, len(self.curves)
)
)

# We attempt to reshape the 1D array read in from
# the data section so that it can be assigned to curves.

try:
data = np.reshape(arr, (-1, n_columns_in_arr))
except ValueError as exception:
error_message = "Cannot reshape ~A data size {0} into {1} columns".format(
arr.shape, n_columns_in_arr
)
if sys.version_info.major < 3:
exception.message = error_message
raise exception
else:
raise ValueError(error_message).with_traceback(
exception.__traceback__
)

self.set_data(data, truncate=False)
finally:
if hasattr(file_obj, "close"):
file_obj.close()
Expand All @@ -235,41 +275,7 @@ def read(
###### logger.warning("No data section (regexp='~A') found")
###### logger.warning("No numerical data found inside ~A section")


if s_valid:
arr = s["array"]
logger.debug("~A data.shape {}".format(arr.shape))
if version_NULL:
arr[arr == null] = np.nan
logger.debug(
"~A after NULL replacement data.shape {}".format(arr.shape)
)

n_curves = len(self.curves)
n_arr_cols = len(self.curves) # provisional pending below check
logger.debug("n_curves=%d ncols=%d" % (n_curves, s["ncols"]))
if wrap == "NO":
if s["ncols"] > n_curves:
n_arr_cols = s["ncols"]
try:
data = np.reshape(arr, (-1, n_arr_cols))
except ValueError as e:
err_msg = (
"cannot reshape ~A array of "
"size {arr_shape} into "
"{n_arr_cols} columns".format(
arr_shape=arr.shape, n_arr_cols=n_arr_cols
)
)
if sys.version_info.major < 3:
e.message = err_msg
raise e
else:
raise ValueError(err_msg).with_traceback(e.__traceback__)
self.set_data(data, truncate=False)
drop.append(s["title"])
for key in drop:
self.raw_sections.pop(key)
# Understand the depth/index unit.

if "m" in str(index_unit):
index_unit = "m"
Expand Down
34 changes: 33 additions & 1 deletion lasio/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,38 @@ def read_file_contents(file_obj, regexp_subs, value_null_subs, ignore_data=False
return sections


def inspect_data_section(file_obj, line_nos):
"""Determine how many columns there are in the data section.
Arguments:
file_obj: file-like object open for reading at the beginning of the section
line_nos (tuple): the first and last line no of the section to read
Returns: integer number of columns or -1 where they are different.
"""
line_no = line_nos[0]
title_line = file_obj.readline()

item_counts = []

for i, line in enumerate(file_obj):
line_no = line_no + 1
line = line.strip("\n").strip()
n_items = len(line.split())
logger.debug("Line {}: {} items counted in '{}'".format(line_no + 1, n_items, line))
item_counts.append(n_items)
if (line_no == line_nos[1]) or (i >= 20):
break

try:
assert len(set(item_counts)) == 1
except AssertionError:
return -1
else:
return item_counts[0]


def read_data_section_iterative(file_obj, line_nos, regexp_subs, value_null_subs):
"""Read data section into memory.
Expand Down Expand Up @@ -505,7 +537,7 @@ def items(f, start_line_no, end_line_no):
break

array = np.array(
[i for i in items(file_obj, start_line_no=line_nos[0] + 1, end_line_no=line_nos[1])]
[i for i in items(file_obj, start_line_no=line_nos[0], end_line_no=line_nos[1])]
)
for value in value_null_subs:
array[array == value] = np.nan
Expand Down

0 comments on commit 4615788

Please sign in to comment.