Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix path handling for non-conforming datasets #81

Merged
merged 11 commits into from
Dec 19, 2024
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ SigMF.egg-info/*

# test related
.coverage
pytest.xml
coverage.xml
.hypothesis/
.tox/
coverage.xml
pytest.xml
htmlcov/*
50 changes: 29 additions & 21 deletions sigmf/sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,12 @@ def set_data_file(self, data_file=None, data_buffer=None, skip_checksum=False, o
self._memmap = raveled.reshape(mapped_reshape)
self.shape = self._memmap.shape if (self._return_type is None) else self._memmap.shape[:-1]

if self.data_file is not None:
file_name = path.split(self.data_file)[1]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't love path handling like this, but I opened #90 and we can address it in the future.

ext = path.splitext(file_name)[1]
if ext.lower() != SIGMF_DATASET_EXT:
self.set_global_field(SigMFFile.DATASET_KEY, file_name)

if skip_checksum:
return None
return self.calculate_hash()
Expand Down Expand Up @@ -932,34 +938,36 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None):
Parse provided metadata and return the expected data filename. In the case of
a metadata only distribution, or if the file does not exist, this will return
'None'. The priority for conflicting:
1. The file named <METAFILE_BASENAME>.sigmf-meta if it exists
2. The file in the `core:dataset` field (Non-Compliant Dataset) if it exists
3. None (may be a metadata only distribution)
1. The file named <stem>.SIGMF_DATASET_EXT if it exists
2. The file in the DATASET_KEY field (Non-Compliant Dataset) if it exists
3. None (may be a metadata only distribution)
"""
compliant_data_fn = get_sigmf_filenames(meta_fn)["data_fn"]
noncompliant_data_fn = metadata["global"].get("core:dataset", None)
compliant_filename = get_sigmf_filenames(meta_fn)["data_fn"]
noncompliant_filename = metadata["global"].get(SigMFFile.DATASET_KEY, None)

if path.isfile(compliant_data_fn):
if noncompliant_data_fn:
if path.isfile(compliant_filename):
if noncompliant_filename:
warnings.warn(
f"Compliant Dataset `{compliant_data_fn}` exists but "
f'"core:dataset" is also defined; using `{compliant_data_fn}`'
f"Compliant Dataset `{compliant_filename}` exists but "
f"{SigMFFile.DATASET_KEY} is also defined; using `{compliant_filename}`"
)
return compliant_data_fn

elif noncompliant_data_fn:
if path.isfile(noncompliant_data_fn):
if metadata["global"].get("core:metadata_only", False):
warnings.warn(
'Schema defines "core:dataset" but "core:meatadata_only" '
f"also exists; using `{noncompliant_data_fn}`"
return compliant_filename

elif noncompliant_filename:
dir_path = path.split(meta_fn)[0]
noncompliant_data_file_path = path.join(dir_path, noncompliant_filename)
if path.isfile(noncompliant_data_file_path):
if metadata["global"].get(SigMFFile.METADATA_ONLY_KEY, False):
raise SigMFFileError(
f"Schema defines {SigMFFile.DATASET_KEY} "
f"but {SigMFFile.METADATA_ONLY_KEY} also exists; using `{noncompliant_filename}`"
)
return noncompliant_data_fn
return noncompliant_data_file_path
else:
warnings.warn(
f"Non-Compliant Dataset `{noncompliant_data_fn}` is specified " 'in "core:dataset" but does not exist!'
raise SigMFFileError(
f"Non-Compliant Dataset `{noncompliant_filename}` is specified in {SigMFFile.DATASET_KEY} "
"but does not exist!"
)

return None


Expand Down
5 changes: 3 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,16 @@
import pytest

from sigmf import __specification__
from sigmf.archive import SIGMF_DATASET_EXT
from sigmf.sigmffile import SigMFFile

from .testdata import TEST_FLOAT32_DATA, TEST_METADATA


@pytest.fixture
def test_data_file():
"""when called, yields temporary file"""
with tempfile.NamedTemporaryFile() as temp:
"""when called, yields temporary dataset"""
with tempfile.NamedTemporaryFile(suffix=f".{SIGMF_DATASET_EXT}") as temp:
TEST_FLOAT32_DATA.tofile(temp.name)
yield temp

Expand Down
64 changes: 64 additions & 0 deletions tests/test_ncd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright: Multiple Authors
#
# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
#
# SPDX-License-Identifier: LGPL-3.0-or-later

"""Tests for Non-Conforming Datasets"""

import copy
import os
import shutil
import tempfile
import unittest
from pathlib import Path

import numpy as np
from hypothesis import given
from hypothesis import strategies as st

from sigmf.error import SigMFFileError
from sigmf.sigmffile import SigMFFile, fromfile

from .testdata import TEST_FLOAT32_DATA, TEST_METADATA


class TestNonConformingDataset(unittest.TestCase):
"""unit tests for NCD"""

def setUp(self):
"""create temporary path"""
self.temp_dir = Path(tempfile.mkdtemp())

def tearDown(self):
"""remove temporary path"""
shutil.rmtree(self.temp_dir)

@given(st.sampled_from([".", "subdir/", "sub0/sub1/sub2/"]))
def test_load_ncd(self, subdir: str) -> None:
"""test loading non-conforming dataset"""
data_path = self.temp_dir / subdir / "dat.bin"
meta_path = self.temp_dir / subdir / "dat.sigmf-meta"
os.makedirs(data_path.parent, exist_ok=True)

# create data file
TEST_FLOAT32_DATA.tofile(data_path)

# create metadata file
ncd_metadata = copy.deepcopy(TEST_METADATA)
meta = SigMFFile(metadata=ncd_metadata, data_file=data_path)
meta.tofile(meta_path)

# load dataset & validate we can read all the data
meta_loopback = fromfile(meta_path)
self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta_loopback.read_samples()))
self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta_loopback[:]))

# delete the non-conforming dataset and ensure error is raised due to missing dataset;
# in Windows the SigMFFile instances need to be garbage collected first,
# otherwise the np.memmap instances (stored in self._memmap) block the deletion
meta = None
meta_loopback = None
os.remove(data_path)
with self.assertRaises(SigMFFileError):
_ = fromfile(meta_path)
Loading