Skip to content

Commit

Permalink
closes #191
Browse files Browse the repository at this point in the history
  • Loading branch information
xrotwang committed Feb 19, 2025
1 parent 8230ea3 commit 80a3ff8
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 0 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
The `pycldf` package adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).


## Unreleased

- Make sure all local media files are copied with `Dataset.copy` as well.


## [1.41.0] - 2025-02-15

- Added a utility function to query SQLite DBs using user-defined functions, aggregates or collations.
Expand Down
8 changes: 8 additions & 0 deletions src/pycldf/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,8 @@ def copy(self, dest: typing.Union[str, pathlib.Path], mdname: str = None) -> pat
... if 'with_examples' in ds.directory.name:
... ds.copy('some_directory', mdname='md.json')
"""
from pycldf.media import MediaTable

dest = pathlib.Path(dest)
if not dest.exists():
dest.mkdir(parents=True)
Expand All @@ -983,6 +985,12 @@ def copy(self, dest: typing.Union[str, pathlib.Path], mdname: str = None) -> pat
mdpath = dest.joinpath(
mdname or # noqa: W504
(self.tablegroup.base.split('/')[-1] if from_url else self.tablegroup._fname.name))
if 'MediaTable' in self:
for f in MediaTable(self):
if f.scheme == 'file' and f.local_path().exists():
target = dest / f.relpath
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(f.local_path(), target)
if from_url:
del ds.tablegroup.at_props['base'] # pragma: no cover
ds.write_metadata(fname=mdpath)
Expand Down
10 changes: 10 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Generic, Wordlist, StructureDataset, Dictionary, ParallelText, Dataset, TextCorpus,
GitRepository, make_column, get_modules, iter_datasets, SchemaError)
from pycldf.sources import Sources
from pycldf.media import MediaTable


@pytest.fixture
Expand Down Expand Up @@ -925,6 +926,15 @@ def test_Dataset_copy(tmp_path):
assert Dataset.from_metadata(tmp_path / 'moved' / 'md.json').validate()


def test_Dataset_copy_with_media(tmp_path, dataset_with_media):
dataset_with_media.copy(tmp_path, mdname='md.json')
filecontent = {f.id: f.read() for f in MediaTable(dataset_with_media)}
ds = Dataset.from_metadata(tmp_path / 'md.json')
for i, f in enumerate(MediaTable(ds)):
assert f.read() == filecontent[f.id]
assert i > 1


def test_Dataset_rename_column(ds):
lt = ds.add_component('LanguageTable')
lt.aboutUrl = URITemplate('{#ID}.md')
Expand Down

0 comments on commit 80a3ff8

Please sign in to comment.