Skip to content

Commit

Permalink
Check DynamicTableRegion data is in bounds
Browse files Browse the repository at this point in the history
  • Loading branch information
rly committed Aug 10, 2024
1 parent 3cc7db3 commit 3ba2646
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 12 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

### Enhancements
- Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157)

- Added a check when setting or adding data to a `DynamicTableRegion` or setting the `table` attribute of a `DynamicTableRegion`
that the data values are in bounds of the linked table. This can be turned off for
`DynamicTableRegion.__init__` using the keyword argument `validate_data=False`. @rly [#1168](https://github.com/hdmf-dev/hdmf/pull/1168)
## HDMF 3.14.3 (July 29, 2024)

### Enhancements
Expand Down
48 changes: 38 additions & 10 deletions src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,42 +1360,64 @@ class DynamicTableRegion(VectorData):
'table',
)

MAX_ROWS_TO_VALIDATE_INIT = int(1e3)

@docval({'name': 'name', 'type': str, 'doc': 'the name of this VectorData'},
{'name': 'data', 'type': ('array_data', 'data'),
'doc': 'a dataset where the first dimension is a concatenation of multiple vectors'},
{'name': 'description', 'type': str, 'doc': 'a description of what this region represents'},
{'name': 'table', 'type': DynamicTable,
'doc': 'the DynamicTable this region applies to', 'default': None},
{'name': 'validate_data', 'type': bool,
'doc': 'whether to validate the data is in bounds of the linked table', 'default': True},
allow_positional=AllowPositional.WARNING)
def __init__(self, **kwargs):
t = popargs('table', kwargs)
t, validate_data = popargs('table', 'validate_data', kwargs)
data = getargs('data', kwargs)
if validate_data:
self._validate_index_in_range(data, t)

super().__init__(**kwargs)
self.table = t
if t is not None: # set the table attribute using fields to avoid another validation in the setter
self.fields['table'] = t

def _validate_index_in_range(self, data, table):
"""If the length of data is small, and if data contains an index that is out of bounds, then raise an error.
If the object is being constructed from a file, raise a warning instead to ensure invalid data can still be
read.
"""
if table and len(data) <= self.MAX_ROWS_TO_VALIDATE_INIT:
for val in data:
if val >= len(table) or val < 0:
error_msg = f"DynamicTableRegion index {val} is out of bounds for {type(table)} '{table.name}'."
self._error_on_new_warn_on_construct(error_msg, error_cls=IndexError)

@property
def table(self):
"""The DynamicTable this DynamicTableRegion is pointing to"""
return self.fields.get('table')

@table.setter
def table(self, val):
def table(self, table):
"""
Set the table this DynamicTableRegion should be pointing to
Set the table this DynamicTableRegion should be pointing to.
:param val: The DynamicTable this DynamicTableRegion should be pointing to
This will validate all data elements in this DynamicTableRegion to ensure they are within bounds.
:param table: The DynamicTable this DynamicTableRegion should be pointing to
:raises: AttributeError if table is already in fields
:raises: IndexError if the current indices are out of bounds for the new table given by val
"""
if val is None:
if table is None:
return
if 'table' in self.fields:
msg = "can't set attribute 'table' -- already set"
raise AttributeError(msg)
dat = self.data
if isinstance(dat, DataIO):
dat = dat.data
self.fields['table'] = val

self.fields['table'] = table
for val in self.data:
self._validate_new_data_element(val)

def __getitem__(self, arg):
return self.get(arg)
Expand Down Expand Up @@ -1540,6 +1562,12 @@ def _validate_on_set_parent(self):
warn(msg, stacklevel=2)
return super()._validate_on_set_parent()

def _validate_new_data_element(self, arg):
"""Validate that the new index is within bounds of the table. Raises an IndexError if not."""
if self.table and (arg >= len(self.table) or arg < 0):
raise IndexError(f"DynamicTableRegion index {arg} is out of bounds for "
f"{type(self.table)} '{self.table.name}'.")


def _uint_precision(elements):
""" Calculate the uint precision needed to encode a set of elements """
Expand Down
13 changes: 13 additions & 0 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,19 @@ def _validate_on_set_parent(self):
"""
pass

def _error_on_new_warn_on_construct(self, error_msg: str, error_cls: type = ValueError):
"""Raise a ValueError when a check is violated on instance creation.
To ensure backwards compatibility, this method throws a warning
instead of raising an error when reading from a file, ensuring that
files with invalid data can be read. If error_msg is set to None
the function will simply return without further action.
"""
if error_msg is None:
return

Check warning on line 578 in src/hdmf/container.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/container.py#L578

Added line #L578 was not covered by tests
if not self._in_construct_mode:
raise error_cls(error_msg)
warn(error_msg)

Check warning on line 581 in src/hdmf/container.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/container.py#L581

Added line #L581 was not covered by tests


class Container(AbstractContainer):
"""A container that can contain other containers and has special functionality for printing."""
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/common/test_linkedtables.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def test_to_hierarchical_dataframe_indexed_dtr_on_last_level(self):
p1 = DynamicTable(name='parent_table', description='parent_table',
columns=[VectorData(name='p1', description='p1', data=np.arange(3)), dtr_p1, vi_dtr_p1])
# Super-parent table
dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=p1)
dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=[0, 1, 2, 0], table=p1)
vi_dtr_sp = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_sp)
spt = DynamicTable(name='super_parent_table', description='super_parent_table',
columns=[VectorData(name='sp1', description='sp1', data=np.arange(3)), dtr_sp, vi_dtr_sp])
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/common/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,34 @@ def test_no_df_nested(self):
with self.assertRaisesWith(ValueError, msg):
dynamic_table_region.get(0, df=False, index=False)

def test_init_out_of_bounds(self):
table = self.with_columns_and_data()
with self.assertRaises(IndexError):
DynamicTableRegion(name='dtr', data=[0, 1, 2, 2, 5], description='desc', table=table)

def test_init_out_of_bounds_long(self):
table = self.with_columns_and_data()
data = np.ones(DynamicTableRegion.MAX_ROWS_TO_VALIDATE_INIT+1, dtype=int)*5
dtr = DynamicTableRegion(name='dtr', data=data, description='desc', table=table)
assert dtr.data is data # no exception raised

def test_init_out_of_bounds_no_validate(self):
table = self.with_columns_and_data()
dtr = DynamicTableRegion(name='dtr', data=[0, 1, 5], description='desc', table=table, validate_data=False)
self.assertEqual(dtr.data, [0, 1, 5]) # no exception raised

def test_add_row_out_of_bounds(self):
table = self.with_columns_and_data()
dtr = DynamicTableRegion(name='dtr', data=[0, 1, 2, 2], description='desc', table=table)
with self.assertRaises(IndexError):
dtr.add_row(5)

def test_set_table_out_of_bounds(self):
table = self.with_columns_and_data()
dtr = DynamicTableRegion(name='dtr', data=[0, 1, 5], description='desc')
with self.assertRaises(IndexError):
dtr.table = table


class DynamicTableRegionRoundTrip(H5RoundTripMixin, TestCase):

Expand Down

0 comments on commit 3ba2646

Please sign in to comment.