diff --git a/CHANGELOG.md b/CHANGELOG.md index 4587282..5a72637 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ The `pycldf` package adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## Unreleased + +- Make sure all local media files are copied with `Dataset.copy` as well. + + ## [1.41.0] - 2025-02-15 - Added a utility function to query SQLite DBs using user-defined functions, aggregates or collations. diff --git a/src/pycldf/dataset.py b/src/pycldf/dataset.py index f0aa3f1..034e74d 100644 --- a/src/pycldf/dataset.py +++ b/src/pycldf/dataset.py @@ -957,6 +957,8 @@ def copy(self, dest: typing.Union[str, pathlib.Path], mdname: str = None) -> pat ... if 'with_examples' in ds.directory.name: ... ds.copy('some_directory', mdname='md.json') """ + from pycldf.media import MediaTable + dest = pathlib.Path(dest) if not dest.exists(): dest.mkdir(parents=True) @@ -983,6 +985,12 @@ def copy(self, dest: typing.Union[str, pathlib.Path], mdname: str = None) -> pat mdpath = dest.joinpath( mdname or # noqa: W504 (self.tablegroup.base.split('/')[-1] if from_url else self.tablegroup._fname.name)) + if 'MediaTable' in self: + for f in MediaTable(self): + if f.scheme == 'file' and f.local_path().exists(): + target = dest / f.relpath + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(f.local_path(), target) if from_url: del ds.tablegroup.at_props['base'] # pragma: no cover ds.write_metadata(fname=mdpath) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index c66e386..fb6b8c9 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -14,6 +14,7 @@ Generic, Wordlist, StructureDataset, Dictionary, ParallelText, Dataset, TextCorpus, GitRepository, make_column, get_modules, iter_datasets, SchemaError) from pycldf.sources import Sources +from pycldf.media import MediaTable @pytest.fixture @@ -925,6 +926,15 @@ def test_Dataset_copy(tmp_path): assert Dataset.from_metadata(tmp_path / 'moved' / 'md.json').validate() +def test_Dataset_copy_with_media(tmp_path, dataset_with_media): + dataset_with_media.copy(tmp_path, mdname='md.json') + filecontent = {f.id: f.read() for f in MediaTable(dataset_with_media)} + ds = Dataset.from_metadata(tmp_path / 'md.json') + for i, f in enumerate(MediaTable(ds)): + assert f.read() == filecontent[f.id] + assert i > 1 + + def test_Dataset_rename_column(ds): lt = ds.add_component('LanguageTable') lt.aboutUrl = URITemplate('{#ID}.md')