From 5858ee07b5af72ed8597e3bb06cbb372a41b8d6c Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 20 Feb 2025 07:18:51 -0800 Subject: [PATCH] Slight refactor to Vector Index get (#1248) * Slight refactor to Vector Index get * clean up * notes * Update CHANGELOG.md * spelling is hard * better try/except --- CHANGELOG.md | 5 ++++- src/hdmf/common/table.py | 34 ++++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4462ff604..d24ac481f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # HDMF Changelog -## [Unreleased] +## HDMF 4.0.1 + +### Enhancements +- Optimized `get` within `VectorIndex` to be more efficient when retrieving a dataset of references. @mavaylon1 [#1248](https://github.com/hdmf-dev/hdmf/pull/1248) ### Changed - `hdmf.monitor` is unused and undocumented. It has been deprecated and will be removed in HDMF 5.0. @rly [#1245](https://github.com/hdmf-dev/hdmf/pull/1245) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 2f6401672..e9de4d345 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -160,6 +160,11 @@ def add_row(self, arg, **kwargs): """ self.add_vector(arg, **kwargs) + def __get_slice(self, arg): + start = 0 if arg == 0 else self.data[arg - 1] + end = self.data[arg] + return slice(start, end) + def __getitem_helper(self, arg, **kwargs): """ Internal helper function used by __getitem__ to retrieve a data value from self.target @@ -168,9 +173,8 @@ def __getitem_helper(self, arg, **kwargs): :param kwargs: any additional arguments to *get* method of the self.target VectorData :return: Scalar or list of values retrieved """ - start = 0 if arg == 0 else self.data[arg - 1] - end = self.data[arg] - return self.target.get(slice(start, end), **kwargs) + slices = self.__get_slice(arg) + return self.target.get(slices, **kwargs) def __getitem__(self, arg): """ @@ -199,8 +203,27 @@ def get(self, arg, **kwargs): arg = np.where(arg)[0] indices = arg ret = list() - for i in indices: - ret.append(self.__getitem_helper(i, **kwargs)) + if len(indices) > 0: + # Note: len(indices) == 0 for test_to_hierarchical_dataframe_empty_tables. + # This is an edge case test for to_hierarchical_dataframe() on empty tables. + # When len(indices) == 0, ret is expected to be an empty list, defined above. + try: + data = self.target.get(slice(None), **kwargs) + except IndexError: + """ + Note: TODO: test_to_hierarchical_dataframe_indexed_dtr_on_last_level. + This is the old way to get the data and not an untested feature. + """ + for i in indices: + ret.append(self.__getitem_helper(i, **kwargs)) + + return ret + + slices = [self.__get_slice(i) for i in indices] + if isinstance(data, pd.DataFrame): + ret = [data.iloc[s] for s in slices] + else: + ret = [data[s] for s in slices] return ret @@ -1453,7 +1476,6 @@ def get(self, arg, index=False, df=True, **kwargs): return ret elif isinstance(arg, (list, slice, np.ndarray)): idx = arg - # get the data at the specified indices if isinstance(self.data, (tuple, list)) and isinstance(idx, (list, np.ndarray)): ret = [self.data[i] for i in idx]