From 566d7d5adae0af1dc0dafe401256f50688260e49 Mon Sep 17 00:00:00 2001 From: Andrew Tritt Date: Thu, 1 Aug 2019 13:46:00 -0700 Subject: [PATCH] Add ability to resolve link to links (#124) * add field for tracking location * add tracking of external link locations * write links in write_builder * add tests * clean up flake8, and use function for getting temp filepath --- src/hdmf/backends/hdf5/h5tools.py | 6 ++ src/hdmf/build/builders.py | 12 ++++ tests/unit/test_io_hdf5_h5tools.py | 109 ++++++++++++++++++----------- 3 files changed, 86 insertions(+), 41 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index ffd738848..8d8b4e9be 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -310,6 +310,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): # get path of link (the key used for tracking what's been built) target_path = link_type.path builder_name = os.path.basename(target_path) + parent_loc = os.path.dirname(target_path) # get builder if already read, else build it builder = self.__get_built(sub_h5obj.file.filename, target_path) if builder is None: @@ -319,6 +320,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): else: builder = self.__read_group(sub_h5obj, builder_name, ignore=ignore) self.__set_built(sub_h5obj.file.filename, target_path, builder) + builder.location = parent_loc link_builder = LinkBuilder(builder, k, source=h5obj.file.filename) link_builder.written = True kwargs['links'][builder_name] = link_builder @@ -446,6 +448,8 @@ def write_builder(self, **kwargs): self.write_group(self.__file, gbldr) for name, dbldr in f_builder.datasets.items(): self.write_dataset(self.__file, dbldr, link_data) + for name, lbldr in f_builder.links.items(): + self.write_link(self.__file, lbldr) self.set_attributes(self.__file, f_builder.attributes) self.__add_refs() @@ -632,6 +636,8 @@ def write_link(self, **kwargs): target_filename = os.path.abspath(target_builder.source) parent_filename = os.path.abspath(parent.file.filename) relative_path = os.path.relpath(target_filename, os.path.dirname(parent_filename)) + if target_builder.location is not None: + path = target_builder.location + path link_obj = ExternalLink(relative_path, path) else: msg = 'cannot create external link to %s' % path diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 1543abd61..0f6b3fd44 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -104,6 +104,18 @@ def __init__(self, **kwargs): super(BaseBuilder, self).__setitem__(BaseBuilder.__attribute, dict()) for name, val in attributes.items(): self.set_attribute(name, val) + self.__location = None + + @property + def location(self): + """ + The location of this Builder in its source + """ + return self.__location + + @location.setter + def location(self, val): + self.__location = val @property def attributes(self): diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 5a8e96d2e..514278e68 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -10,7 +10,7 @@ from hdmf.backends.hdf5.h5tools import HDF5IO, ROOT_NAME from hdmf.backends.hdf5 import H5DataIO from hdmf.backends.io import UnsupportedOperation -from hdmf.build import DatasetBuilder, BuildManager, TypeMap, ObjectMapper +from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, ObjectMapper from hdmf.spec.namespace import NamespaceCatalog from hdmf.spec.spec import AttributeSpec, DatasetSpec, GroupSpec, ZERO_OR_MANY, ONE_OR_MANY from hdmf.spec.namespace import SpecNamespace @@ -472,12 +472,7 @@ class TestRoundTrip(unittest.TestCase): def setUp(self): self.manager = _get_manager() - self.test_temp_file = tempfile.NamedTemporaryFile() - self.test_temp_file.close() - # On Windows h5py cannot truncate an open file in write mode. - # The temp file will be closed before h5py truncates it - # and will be removed during the tearDown step. - self.path = self.test_temp_file.name + self.path = get_temp_filepath() def tearDown(self): if os.path.exists(self.path): @@ -525,12 +520,7 @@ class TestCacheSpec(unittest.TestCase): def setUp(self): self.manager = _get_manager() - self.test_temp_file = tempfile.NamedTemporaryFile() - self.test_temp_file.close() - # On Windows h5py cannot truncate an open file in write mode. - # The temp file will be closed before h5py truncates it - # and will be removed during the tearDown step. - self.path = self.test_temp_file.name + self.path = get_temp_filepath() def tearDown(self): if os.path.exists(self.path): @@ -578,12 +568,7 @@ class TestNoCacheSpec(unittest.TestCase): def setUp(self): self.manager = _get_manager() - self.test_temp_file = tempfile.NamedTemporaryFile() - self.test_temp_file.close() - # On Windows h5py cannot truncate an open file in write mode. - # The temp file will be closed before h5py truncates it - # and will be removed during the tearDown step. - self.path = self.test_temp_file.name + self.path = get_temp_filepath() def tearDown(self): if os.path.exists(self.path): @@ -599,7 +584,7 @@ def test_no_cache_spec(self): with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile, cache_spec=False) - with File(self.test_temp_file.name) as f: + with File(self.path) as f: self.assertNotIn('specifications', f) @@ -745,12 +730,7 @@ class HDF5IOReadNoDataTest(unittest.TestCase): """ Test if file exists and there is no data, read with mode (r, r+, a) throws error """ def setUp(self): - # On Windows h5py cannot truncate an open file in write mode. - # The temp file will be closed before h5py truncates it - # and will be removed during the tearDown step. - temp_file = tempfile.NamedTemporaryFile() - temp_file.close() - self.path = temp_file.name + self.path = get_temp_filepath() temp_io = HDF5IO(self.path, mode='w') temp_io.close() self.io = None @@ -791,12 +771,7 @@ class HDF5IOReadData(unittest.TestCase): """ def setUp(self): - temp_file = tempfile.NamedTemporaryFile() - temp_file.close() - self.path = temp_file.name - # On Windows h5py cannot truncate an open file in write mode. - # The temp file will be closed before h5py truncates it - # and will be removed during the tearDown step. + self.path = get_temp_filepath() foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) bucket1 = FooBucket('test_bucket1', [foo1]) self.foofile1 = FooFile('test_foofile1', buckets=[bucket1]) @@ -867,9 +842,7 @@ class HDF5IOWriteFileExists(unittest.TestCase): """ Test if file exists, write in mode (r+, w, a) is ok and write in mode r throws error """ def setUp(self): - temp_file = tempfile.NamedTemporaryFile() - temp_file.close() - self.path = temp_file.name + self.path = get_temp_filepath() foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) bucket1 = FooBucket('test_bucket1', [foo1]) @@ -928,9 +901,7 @@ def test_write_r(self): class H5DataIOValid(unittest.TestCase): def setUp(self): - temp_file = tempfile.NamedTemporaryFile() - temp_file.close() - self.paths = [temp_file.name, ] + self.paths = [get_temp_filepath(), ] self.foo1 = Foo('foo1', H5DataIO([1, 2, 3, 4, 5]), "I am foo1", 17, 3.14) bucket1 = FooBucket('test_bucket1', [self.foo1]) @@ -964,9 +935,7 @@ def test_link(self): bucket2 = FooBucket('test_bucket2', [self.foo2]) foofile2 = FooFile(buckets=[bucket2]) - temp_file = tempfile.NamedTemporaryFile() - temp_file.close() - self.paths.append(temp_file.name) + self.paths.append(get_temp_filepath()) with HDF5IO(self.paths[1], manager=_get_manager(), mode='w') as io: io.write(foofile2) @@ -1024,5 +993,63 @@ def test_link(self): self.assertEqual(next(my_iter), 1) +def get_temp_filepath(): + # On Windows h5py cannot truncate an open file in write mode. + # The temp file will be closed before h5py truncates it + # and will be removed during the tearDown step. + temp_file = tempfile.NamedTemporaryFile() + temp_file.close() + return temp_file.name + + +class TestReadLink(unittest.TestCase): + def setUp(self): + self.target_path = get_temp_filepath() + self.link_path = get_temp_filepath() + self.root1 = GroupBuilder(name='root') + self.subgroup = self.root1.add_group('test_group') + self.dataset = self.subgroup.add_dataset('test_dataset', data=[1, 2, 3, 4]) + + self.root2 = GroupBuilder(name='root') + self.group_link = self.root2.add_link(self.subgroup, 'link_to_test_group') + self.dataset_link = self.root2.add_link(self.dataset, 'link_to_test_dataset') + + with HDF5IO(self.target_path, manager=_get_manager(), mode='w') as io: + io.write_builder(self.root1) + self.root1.source = self.target_path + + with HDF5IO(self.link_path, manager=_get_manager(), mode='w') as io: + io.write_builder(self.root2) + self.root2.source = self.link_path + + def test_set_link_loc(self): + """ + Test that Builder location is set when it is read as a link + """ + read_io = HDF5IO(self.link_path, manager=_get_manager(), mode='r') + bldr = read_io.read_builder() + self.assertEqual(bldr['link_to_test_group'].builder.location, '/') + self.assertEqual(bldr['link_to_test_dataset'].builder.location, '/test_group') + read_io.close() + + def test_link_to_link(self): + """ + Test that link to link gets written and read properly + """ + link_to_link_path = get_temp_filepath() + read_io1 = HDF5IO(self.link_path, manager=_get_manager(), mode='r') + bldr1 = read_io1.read_builder() + root3 = GroupBuilder(name='root') + root3.add_link(bldr1['link_to_test_group'].builder, 'link_to_link') + with HDF5IO(link_to_link_path, manager=_get_manager(), mode='w') as io: + io.write_builder(root3) + read_io1.close() + + read_io2 = HDF5IO(link_to_link_path, manager=_get_manager(), mode='r') + bldr2 = read_io2.read_builder() + self.assertEqual(bldr2['link_to_link'].builder.source, self.target_path) + read_io2.close() + + if __name__ == '__main__': unittest.main()