Skip to content

Commit

Permalink
Fix incorrect dtype precision upgrade for VectorIndex (#631)
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Jun 16, 2021
1 parent 36db0e8 commit 929ec93
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 8 deletions.
9 changes: 7 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
# HDMF Changelog

## HDMF 2.5.8 (Upcoming)
## HDMF 2.5.8 (June 16, 2021)

### Minor improvements
- Improve Sphinx documentation. @rly (#627)

### Bug fix
- Fix error with representing an indexed table column when the `VectorIndex` dtype precision is upgraded more
than one step, e.g., uint8 to uint32. This can happen when, for example, a single `add_row` call is used to
add more than 65535 elements to an empty indexed column. @rly (#631)

## HDMF 2.5.7 (June 4, 2021)

### Bug fix
- Fix generation of extension classes that extend MultiContainerInterface and use a custom _fieldsname. @rly (#626)
- Fix generation of extension classes that extend `MultiContainerInterface` and use a custom _fieldsname. @rly (#626)

## HDMF 2.5.6 (May 19, 2021)

Expand Down
17 changes: 11 additions & 6 deletions src/hdmf/common/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,28 +107,33 @@ def add_vector(self, arg, **kwargs):

def __check_precision(self, idx):
"""
Check precision of current dataset and, if
necessary, adjust precision to accommodate new value.
Check precision of current dataset and, if necessary, adjust precision to accommodate new value.
Returns:
unsigned integer encoding of idx
"""
if idx > self.__maxval:
nbits = (np.log2(self.__maxval + 1) * 2)
while idx > self.__maxval:
nbits = (np.log2(self.__maxval + 1) * 2) # 8->16, 16->32, 32->64
if nbits == 128: # pragma: no cover
msg = ('Cannot store more than 18446744073709551615 elements in a VectorData. Largest dtype '
'allowed for VectorIndex is uint64.')
raise ValueError(msg)
self.__maxval = 2 ** nbits - 1
self.__uint = np.dtype('uint%d' % nbits).type
self.__maxval = 2 ** nbits - 1
self.__adjust_precision(self.__uint)
return self.__uint(idx)

def __adjust_precision(self, uint):
"""
Adjust precision of data to specificied unsigned integer precision
Adjust precision of data to specificied unsigned integer precision.
"""
if isinstance(self.data, list):
for i in range(len(self.data)):
self.data[i] = uint(self.data[i])
elif isinstance(self.data, np.ndarray):
self._VectorIndex__data = self.data.astype(uint)
# use self._Data__data to work around restriction on resetting self.data
self._Data__data = self.data.astype(uint)
else:
raise ValueError("cannot adjust precision of type %s to %s", (type(self.data), uint))

Expand Down
63 changes: 63 additions & 0 deletions tests/unit/common/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,3 +1838,66 @@ def test_dtr_references(self):
'y': [read_group1, read_group2]},
index=pd.Index(data=[102, 103], name='id'))
pd.testing.assert_frame_equal(ret, expected)


class TestVectorIndexDtype(TestCase):

def set_up_array_index(self):
data = VectorData(name='data', description='desc')
index = VectorIndex(name='index', data=np.array([]), target=data)
return index

def set_up_list_index(self):
data = VectorData(name='data', description='desc')
index = VectorIndex(name='index', data=[], target=data)
return index

def test_array_inc_precision(self):
index = self.set_up_array_index()
index.add_vector(np.empty((255, )))
self.assertEqual(index.data[0], 255)
self.assertEqual(index.data.dtype, np.uint8)

def test_array_inc_precision_1step(self):
index = self.set_up_array_index()
index.add_vector(np.empty((65535, )))
self.assertEqual(index.data[0], 65535)
self.assertEqual(index.data.dtype, np.uint16)

def test_array_inc_precision_2steps(self):
index = self.set_up_array_index()
index.add_vector(np.empty((65536, )))
self.assertEqual(index.data[0], 65536)
self.assertEqual(index.data.dtype, np.uint32)

def test_array_prev_data_inc_precision_2steps(self):
index = self.set_up_array_index()
index.add_vector(np.empty((255, ))) # dtype is still uint8
index.add_vector(np.empty((65536, )))
self.assertEqual(index.data[0], 255) # make sure the 255 is upgraded
self.assertEqual(index.data.dtype, np.uint32)

def test_list_inc_precision(self):
index = self.set_up_list_index()
index.add_vector(list(range(255)))
self.assertEqual(index.data[0], 255)
self.assertEqual(type(index.data[0]), np.uint8)

def test_list_inc_precision_1step(self):
index = self.set_up_list_index()
index.add_vector(list(range(65535)))
self.assertEqual(index.data[0], 65535)
self.assertEqual(type(index.data[0]), np.uint16)

def test_list_inc_precision_2steps(self):
index = self.set_up_list_index()
index.add_vector(list(range(65536)))
self.assertEqual(index.data[0], 65536)
self.assertEqual(type(index.data[0]), np.uint32)

def test_list_prev_data_inc_precision_2steps(self):
index = self.set_up_list_index()
index.add_vector(list(range(255)))
index.add_vector(list(range(65536 - 255)))
self.assertEqual(index.data[0], 255) # make sure the 255 is upgraded
self.assertEqual(type(index.data[0]), np.uint32)

0 comments on commit 929ec93

Please sign in to comment.