From 55338fc475a0ee2df884f5ebcdc6fac93cc045a0 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 16 Feb 2025 10:24:34 -0800 Subject: [PATCH 1/6] Slight refactor to Vector Index get --- src/hdmf/common/table.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 84ac4da3b..6028ba57c 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -160,6 +160,11 @@ def add_row(self, arg, **kwargs): """ self.add_vector(arg, **kwargs) + def __get_slice(self, arg, **kwargs): + start = 0 if arg == 0 else self.data[arg - 1] + end = self.data[arg] + return slice(start, end) + def __getitem_helper(self, arg, **kwargs): """ Internal helper function used by __getitem__ to retrieve a data value from self.target @@ -199,8 +204,20 @@ def get(self, arg, **kwargs): arg = np.where(arg)[0] indices = arg ret = list() - for i in indices: - ret.append(self.__getitem_helper(i, **kwargs)) + if len(indices) > 0: # This is for test_to_hierarchical_dataframe_empty_tables + try: + data = self.target.get(slice(None), **kwargs) + slices = [self.__get_slice(i) for i in indices] + if isinstance(data, pd.DataFrame): + ret = [data.iloc[s] for s in slices] + else: + ret = [data[s] for s in slices] + except IndexError: + """ + Note: TODO: test_to_hierarchical_dataframe_indexed_dtr_on_last_level + """ + for i in indices: + ret.append(self.__getitem_helper(i, **kwargs)) return ret @@ -1440,7 +1457,6 @@ def get(self, arg, index=False, df=True, **kwargs): return ret elif isinstance(arg, (list, slice, np.ndarray)): idx = arg - # get the data at the specified indices if isinstance(self.data, (tuple, list)) and isinstance(idx, (list, np.ndarray)): ret = [self.data[i] for i in idx] From cd13159d84d5427eaba2892565dca6c04099bc64 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sun, 16 Feb 2025 10:29:19 -0800 Subject: [PATCH 2/6] clean up --- src/hdmf/common/table.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 6028ba57c..90109849d 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -160,7 +160,7 @@ def add_row(self, arg, **kwargs): """ self.add_vector(arg, **kwargs) - def __get_slice(self, arg, **kwargs): + def __get_slice(self, arg): start = 0 if arg == 0 else self.data[arg - 1] end = self.data[arg] return slice(start, end) @@ -173,9 +173,8 @@ def __getitem_helper(self, arg, **kwargs): :param kwargs: any additional arguments to *get* method of the self.target VectorData :return: Scalar or list of values retrieved """ - start = 0 if arg == 0 else self.data[arg - 1] - end = self.data[arg] - return self.target.get(slice(start, end), **kwargs) + slices = self.__get_slice(arg) + return self.target.get(slices, **kwargs) def __getitem__(self, arg): """ From 7bbeba4cef8c9956b4f286738ca48a40297ce0ea Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 19 Feb 2025 10:44:58 -0800 Subject: [PATCH 3/6] notes --- src/hdmf/common/table.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 8e045edb2..42af209a4 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -203,7 +203,10 @@ def get(self, arg, **kwargs): arg = np.where(arg)[0] indices = arg ret = list() - if len(indices) > 0: # This is for test_to_hierarchical_dataframe_empty_tables + if len(indices) > 0: + # Note: len(indices) == 0 for test_to_hierarchical_dataframe_empty_tables. + # This is an edge case test for to_hierarchical_dataframe() on empty tables. + # When len(indices) == 0, ret is expected to be an empty list, defiend above. try: data = self.target.get(slice(None), **kwargs) slices = [self.__get_slice(i) for i in indices] @@ -213,7 +216,8 @@ def get(self, arg, **kwargs): ret = [data[s] for s in slices] except IndexError: """ - Note: TODO: test_to_hierarchical_dataframe_indexed_dtr_on_last_level + Note: TODO: test_to_hierarchical_dataframe_indexed_dtr_on_last_level. + This is the old way to get the data and not an untested feature. """ for i in indices: ret.append(self.__getitem_helper(i, **kwargs)) From 887ee9453eb9266887e23c4c52a846f8e8686305 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 19 Feb 2025 10:50:52 -0800 Subject: [PATCH 4/6] Update CHANGELOG.md --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4462ff604..d24ac481f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # HDMF Changelog -## [Unreleased] +## HDMF 4.0.1 + +### Enhancements +- Optimized `get` within `VectorIndex` to be more efficient when retrieving a dataset of references. @mavaylon1 [#1248](https://github.com/hdmf-dev/hdmf/pull/1248) ### Changed - `hdmf.monitor` is unused and undocumented. It has been deprecated and will be removed in HDMF 5.0. @rly [#1245](https://github.com/hdmf-dev/hdmf/pull/1245) From 551b56ef656f4fa010f4380a7914f3cd8f8d647c Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 19 Feb 2025 10:51:16 -0800 Subject: [PATCH 5/6] spelling is hard --- src/hdmf/common/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 42af209a4..309c66181 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -206,7 +206,7 @@ def get(self, arg, **kwargs): if len(indices) > 0: # Note: len(indices) == 0 for test_to_hierarchical_dataframe_empty_tables. # This is an edge case test for to_hierarchical_dataframe() on empty tables. - # When len(indices) == 0, ret is expected to be an empty list, defiend above. + # When len(indices) == 0, ret is expected to be an empty list, defined above. try: data = self.target.get(slice(None), **kwargs) slices = [self.__get_slice(i) for i in indices] From f68969cbb73d0d11a283082c579759ddf26d906e Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 19 Feb 2025 10:55:22 -0800 Subject: [PATCH 6/6] better try/except --- src/hdmf/common/table.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 309c66181..e9de4d345 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -209,11 +209,6 @@ def get(self, arg, **kwargs): # When len(indices) == 0, ret is expected to be an empty list, defined above. try: data = self.target.get(slice(None), **kwargs) - slices = [self.__get_slice(i) for i in indices] - if isinstance(data, pd.DataFrame): - ret = [data.iloc[s] for s in slices] - else: - ret = [data[s] for s in slices] except IndexError: """ Note: TODO: test_to_hierarchical_dataframe_indexed_dtr_on_last_level. @@ -221,6 +216,14 @@ def get(self, arg, **kwargs): """ for i in indices: ret.append(self.__getitem_helper(i, **kwargs)) + + return ret + + slices = [self.__get_slice(i) for i in indices] + if isinstance(data, pd.DataFrame): + ret = [data.iloc[s] for s in slices] + else: + ret = [data[s] for s in slices] return ret