Skip to content

Commit

Permalink
Fix validation of datetime-formatted strings
Browse files Browse the repository at this point in the history
  • Loading branch information
rly committed Jan 12, 2024
1 parent b7cece2 commit 4465438
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 23 deletions.
67 changes: 44 additions & 23 deletions src/hdmf/validate/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
__allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k))


def check_type(expected, received):
def check_type(expected, received, string_format=None):
'''
*expected* should come from the spec
*received* should come from the data
Expand All @@ -52,6 +52,9 @@ def check_type(expected, received):
raise ValueError('compound type shorter than expected')
for i, exp in enumerate(DtypeHelper.simplify_cpd_type(expected)):
rec = received[i]
if exp == "isodatetime": # short circuit for isodatetime
sub_string_format = string_format[i]
return rec in ("utf", "ascii") and sub_string_format == "isodatetime"

Check warning on line 57 in src/hdmf/validate/validator.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/validate/validator.py#L56-L57

Added lines #L56 - L57 were not covered by tests
if rec not in __allowable[exp]:
return False
return True
Expand All @@ -71,6 +74,11 @@ def check_type(expected, received):
received = received.name
elif isinstance(received, type):
received = received.__name__
if expected == "isodatetime": # short circuit for isodatetime
return (
received in __allowable[expected] or
(received in ("utf", "ascii") and string_format == "isodatetime")
)
if isinstance(expected, RefSpec):
expected = expected.reftype
elif isinstance(expected, type):
Expand All @@ -89,48 +97,58 @@ def get_iso8601_regex():
_iso_re = get_iso8601_regex()


def _check_isodatetime(s, default=None):
def get_string_format(data):
"""Return the string format of the given data. Possible outputs are "isodatetime" and None.
"""
assert isinstance(data, (str, bytes))
try:
if _iso_re.match(pystr(s)) is not None:
if _iso_re.match(pystr(data)) is not None:
return 'isodatetime'
except Exception:
pass
return default
return None


class EmptyArrayError(Exception):
pass


def get_type(data):
def get_type(data, builder_dtype=None):
"""Return a tuple of (the string representation of the type, the format of the string data) for the given data."""
if isinstance(data, str):
return _check_isodatetime(data, 'utf')
return 'utf', get_string_format(data)
elif isinstance(data, bytes):
return _check_isodatetime(data, 'ascii')
return 'ascii', get_string_format(data)
elif isinstance(data, RegionBuilder):
return 'region'
return 'region', None

Check warning on line 123 in src/hdmf/validate/validator.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/validate/validator.py#L123

Added line #L123 was not covered by tests
elif isinstance(data, ReferenceBuilder):
return 'object'
return 'object', None

Check warning on line 125 in src/hdmf/validate/validator.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/validate/validator.py#L125

Added line #L125 was not covered by tests
elif isinstance(data, ReferenceResolver):
return data.dtype
return data.dtype, None
elif isinstance(data, np.ndarray):
if data.size == 0:
raise EmptyArrayError()
return get_type(data[0])
return get_type(data[0], builder_dtype)
elif isinstance(data, np.bool_):
return 'bool'
return 'bool', None
if not hasattr(data, '__len__'):
return type(data).__name__
return type(data).__name__, None
else:
if builder_dtype and isinstance(builder_dtype, list): # compound dtype
dtypes = []
string_formats = []
for i in range(len(builder_dtype)):
dtype, string_format = get_type(data[0][i])
dtypes.append(dtype)
string_formats.append(string_format)
return dtypes, string_formats
if hasattr(data, 'dtype'):
if isinstance(data.dtype, list):
return [get_type(data[0][i]) for i in range(len(data.dtype))]
if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None:
return get_type(data[0])
return data.dtype
return data.dtype, None
if len(data) == 0:
raise EmptyArrayError()
return get_type(data[0])
return get_type(data[0], builder_dtype)


def check_shape(expected, received):
Expand Down Expand Up @@ -310,7 +328,7 @@ def validate(self, **kwargs):
if not isinstance(value, BaseBuilder):
expected = '%s reference' % spec.dtype.reftype
try:
value_type = get_type(value)
value_type, _ = get_type(value)

Check warning on line 331 in src/hdmf/validate/validator.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/validate/validator.py#L331

Added line #L331 was not covered by tests
ret.append(DtypeError(self.get_spec_loc(spec), expected, value_type))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
Expand All @@ -323,8 +341,8 @@ def validate(self, **kwargs):
ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type))
else:
try:
dtype = get_type(value)
if not check_type(spec.dtype, dtype):
dtype, string_format = get_type(value)
if not check_type(spec.dtype, dtype, string_format):
ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
Expand Down Expand Up @@ -385,14 +403,17 @@ def validate(self, **kwargs):
data = builder.data
if self.spec.dtype is not None:
try:
dtype = get_type(data)
if not check_type(self.spec.dtype, dtype):
dtype, string_format = get_type(data, builder.dtype)
if not check_type(self.spec.dtype, dtype, string_format):
ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype,
location=self.get_builder_loc(builder)))
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
pass
shape = get_data_shape(data)
if isinstance(builder.dtype, list):
shape = (len(builder.data), ) # only 1D datasets with compound types are supported
else:
shape = get_data_shape(data)
if not check_shape(self.spec.shape, shape):
if shape is None:
ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data),
Expand Down
52 changes: 52 additions & 0 deletions tests/unit/validator_tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,58 @@ def test_empty_nparray(self):
# TODO test shape validation more completely


class TestStringDatetime(TestCase):

def test_str_coincidental_isodatetime(self):
"""Test validation of a text spec allows a string that coincidentally matches the isodatetime format."""
spec_catalog = SpecCatalog()
spec = GroupSpec(
doc='A test group specification with a data type',
data_type_def='Bar',
datasets=[
DatasetSpec(doc='an example scalar dataset', dtype="text", name='data1'),
DatasetSpec(doc='an example 1D dataset', dtype="text", name='data2', shape=(None, )),
DatasetSpec(
doc='an example 1D compound dtype dataset',
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='text'),
],
name='data3',
shape=(None, ),
),
],
attributes=[
AttributeSpec(name='attr1', doc='an example scalar attribute', dtype="text"),
AttributeSpec(name='attr2', doc='an example 1D attribute', dtype="text", shape=(None, )),
]
)
spec_catalog.register_spec(spec, 'test.yaml')
namespace = SpecNamespace(
'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog
)
vmap = ValidatorMap(namespace)

bar_builder = GroupBuilder(
name='my_bar',
attributes={'data_type': 'Bar', 'attr1': "2023-01-01", 'attr2': ["2023-01-01"]},
datasets=[
DatasetBuilder(name='data1', data="2023-01-01"),
DatasetBuilder(name='data2', data=["2023-01-01"]),
DatasetBuilder(
name='data3',
data=[(1, "2023-01-01")],
dtype=[
DtypeSpec('x', doc='x', dtype='int'),
DtypeSpec('y', doc='y', dtype='text'),
],
),
],
)
results = vmap.validate(bar_builder)
self.assertEqual(len(results), 0)


class TestLinkable(TestCase):

def set_up_spec(self):
Expand Down

0 comments on commit 4465438

Please sign in to comment.