Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for corrupted Nortek files #372

Merged
merged 2 commits into from
Jan 14, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions mhkit/dolfyn/io/base.py
Original file line number Diff line number Diff line change
@@ -83,6 +83,9 @@ def _handle_nan(data):
Finds trailing nan's that cause issues in running the rotation
algorithms and deletes them.
"""
if "time" not in data["coords"]:
raise Exception("No data recorded in file.")

nan = np.zeros(data["coords"]["time"].shape, dtype=bool)
l = data["coords"]["time"].size

9 changes: 9 additions & 0 deletions mhkit/dolfyn/io/nortek.py
Original file line number Diff line number Diff line change
@@ -262,6 +262,7 @@ def __init__(
self.config["coord_sys_axes"]
]
da["has_imu"] = 0 # Initiate attribute
self._eof = self.pos
if self.debug:
logging.info("Init completed")

@@ -384,13 +385,17 @@ def findnext(self, do_cs=True):
if self.endian == "<":
func = np.uint8
func2 = lib._bitshift8
searching = False
while True:
val = unpack(self.endian + "H", self.read(2))[0]
if np.array(val).astype(func) == 165 and (not do_cs or cs == sum):
self.f.seek(-2, 1)
return hex(func2(val))
sum += cs
cs = val
if self.debug and not searching:
logging.debug("Scanning every 2 bytes for next datablock...")
searching = True

def read_id(self):
"""Read the next 'ID' from the file."""
@@ -456,13 +461,17 @@ def findnextid(self, id):
id = int(id, 0)
nowid = None
while nowid != id:
pos = self.pos
nowid = self.read_id()
if nowid == 16:
shift = 22
else:
sz = 2 * unpack(self.endian + "H", self.read(2))[0]
shift = sz - 4
self.f.seek(shift, 1)
# If we get stuck in a while loop
if self.pos == pos:
self.f.seek(2, 1)
return self.pos

def code_spacing(self, searchcode, iternum=50):
17 changes: 12 additions & 5 deletions mhkit/dolfyn/io/nortek2.py
Original file line number Diff line number Diff line change
@@ -14,6 +14,9 @@
from ..time import epoch2dt64, _fill_time_gaps


int32_max = np.iinfo(np.int32).max


def read_signature(
filename,
userdata=True,
@@ -163,7 +166,7 @@ def __init__(
debug=debug,
dp=dual_profile,
)
self._reopen(bufsize)
self._open(bufsize)
self.filehead_config = self._read_filehead_config_string()
self._ens_pos = self._index["pos"][
lib._boolarray_firstensemble_ping(self._index)
@@ -183,7 +186,7 @@ def _calc_lastblock_iswhole(
return (self._eof - self._ens_pos[-1]) == standard_blocksize

def _check_nortek(self, endian):
self._reopen(10)
self._open(10)
byts = self.f.read(2)
if endian is None:
if unpack("<" + "BB", byts) == (165, 10):
@@ -205,8 +208,12 @@ def find_all(s, c):
yield idx
idx = s.find(c, idx + 1)

# Open the entire file
self._reopen(self._eof)
# Open the entire file to find start header
if self._eof >= int32_max:
init_buffer = int32_max
else:
init_buffer = self._eof
self._open(init_buffer)
pk = self.f.peek(1)
# Search for multiple saved headers
found = [i for i in find_all(pk, b"GETCLOCKSTR")]
@@ -216,7 +223,7 @@ def find_all(s, c):
start_idx = found[-1] - 11
return start_idx

def _reopen(self, bufsize=None):
def _open(self, bufsize=None):
if bufsize is None:
bufsize = 1000000
try: