Skip to content

Commit

Permalink
implement index file handling
Browse files Browse the repository at this point in the history
  • Loading branch information
ap-- committed May 30, 2022
1 parent 28f9d81 commit 9bc9a86
Showing 1 changed file with 48 additions and 27 deletions.
75 changes: 48 additions & 27 deletions pyfaidx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from itertools import islice
from math import ceil
from os.path import getmtime
from tempfile import TemporaryFile
from threading import Lock
from pkg_resources import get_distribution

Expand Down Expand Up @@ -351,19 +352,21 @@ def __init__(self,
"""
if fsspec and isinstance(filename, fsspec.core.OpenFile):
self.filename = filename.path
assert filename.mode == 'rb'
assert filename.compression is None # restriction could potentially be lifted
assert getattr(filename, 'mode', 'rb') == 'rb'
assert getattr(filename, 'compression', None) is None # restriction could potentially be lifted
try:
self.file = filename.open()
except IOError:
raise FastaNotFoundError("Cannot read FASTA from OpenFile %s" % filename)
self._fs = filename.fs

elif isinstance(filename, str) or hasattr(filename, '__fspath__'):
self.filename = str(filename)
try:
self.file = open(filename, 'r+b' if mutable else 'rb')
except IOError:
raise FastaNotFoundError("Cannot read FASTA from file %s" % filename)
self._fs = None

else:
raise TypeError("filename expected str, os.PathLike or fsspec.OpenFile, got: %r" % filename)
Expand Down Expand Up @@ -397,7 +400,7 @@ def __init__(self,
else:
self._bgzf = False

self.indexname = filename + '.fai'
self.indexname = self.filename + '.fai'
self.read_long_names = read_long_names
self.key_function = key_function
try:
Expand Down Expand Up @@ -438,32 +441,41 @@ def __init__(self,

self.mutable = mutable
with self.lock: # lock around index generation so only one thread calls method
try:
if os.path.exists(self.indexname) and getmtime(
self.indexname) >= getmtime(self.filename):
self.read_fai()
elif os.path.exists(self.indexname) and getmtime(
self.indexname) < getmtime(
self.filename) and not rebuild:
self.read_fai()
warnings.warn(
"Index file {0} is older than FASTA file {1}.".format(
self.indexname, self.filename), RuntimeWarning)
elif build_index:

if self._fs:
index_exists = self._fs.exists(self.indexname)
index_is_stale = index_exists and (
self._fs.stat(self.filename)["mtime"] > self._fs.stat(self.indexname)["mtime"]
)
else:
index_exists = os.path.exists(self.indexname)
index_is_stale = index_exists and (
getmtime(self.filename) > getmtime(self.indexname)
)

if (
build_index
and (not index_exists or (index_is_stale and rebuild))
):
try:
self.build_index()
self.read_fai()
else:
self.read_fai()
except FastaIndexingError:
self.file.close()
raise

except FastaIndexingError:
self.file.close()
os.remove(self.indexname + '.tmp')
raise
try:
self.read_fai()
except Exception:
# Handle potential exceptions other than 'FastaIndexingError'
self.file.close()
raise

if index_is_stale and not rebuild:
warnings.warn(
"Index file {0} is older than FASTA file {1}.".format(
self.indexname, self.filename
), RuntimeWarning
)

def __contains__(self, region):
if not self.buffer['name']:
return False
Expand All @@ -483,7 +495,7 @@ def _index_as_string(self):

def read_fai(self):
try:
with open(self.indexname) as index:
with self._open_fai(mode='r') as index:
prev_bend = 0
drop_keys = []
for line in index:
Expand Down Expand Up @@ -535,7 +547,7 @@ def build_index(self):
assert self.file.tell() == 0
try:
with rewind(self.file) as fastafile:
with open(self.indexname + '.tmp', 'w') as indexfile:
with TemporaryFile(mode='w+') as indexfile:
rname = None # reference sequence name
offset = 0 # binary offset of end of current line
rlen = 0 # reference character length
Expand Down Expand Up @@ -616,7 +628,10 @@ def build_index(self):
"Inconsistent line found in >{0} at "
"line {1:n}.".format(rname,
bad_lines[0][0] + 1))
shutil.move(self.indexname + '.tmp', self.indexname)

indexfile.seek(0)
with self._open_fai(mode='w') as target:
shutil.copyfileobj(indexfile, target)
except (IOError, FastaIndexingError) as e:
if isinstance(e, IOError):
raise IOError(
Expand All @@ -627,10 +642,16 @@ def build_index(self):

def write_fai(self):
with self.lock:
with open(self.indexname, 'w') as outfile:
with self._open_fai(mode='w') as outfile:
for line in self._index_as_string():
outfile.write(line)

def _open_fai(self, mode):
if self._fs:
return self._fs.open(self.indexname, mode=mode)
else:
return open(self.indexname, mode=mode)

def from_buffer(self, start, end):
i_start = start - self.buffer['start'] # want [0, 1) coordinates from [1, 1] coordinates
i_end = end - self.buffer['start'] + 1
Expand Down

0 comments on commit 9bc9a86

Please sign in to comment.