From 629d9244b9b5eaefe0e78a43c53335f07dcdce6d Mon Sep 17 00:00:00 2001 From: Robert Forkel Date: Fri, 26 Apr 2024 17:05:28 +0200 Subject: [PATCH] closes #173 --- CHANGELOG.md | 1 + src/pycldf/validators.py | 11 +++++++++++ tests/test_dataset.py | 11 +++++++++++ 3 files changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb198ea..2febd70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The `pycldf` package adheres to [Semantic Versioning](http://semver.org/spec/v2. by `json.dumps`. - Fixed bug whereby SQLite conversion would fail when the name of a renamed column clashed with an existing column name in the table. +- Emit warning when encountering invalid main part for mediaType property. ## [1.37.1] - 2024-03-18 diff --git a/src/pycldf/validators.py b/src/pycldf/validators.py index 80cffd8..d252d54 100644 --- a/src/pycldf/validators.py +++ b/src/pycldf/validators.py @@ -1,4 +1,5 @@ import re +import warnings import functools @@ -37,6 +38,12 @@ def valid_grammaticalityJudgement(dataset, table, column, row): raise ValueError('Glottolog language linked from ungrammatical example') +def valid_mediaType(dataset, table, column, row): + main, _, sub = row[column.name].partition('/') + if not re.fullmatch('[a-z]+', main): + warnings.warn('Invalid main part in media type: {}'.format(main)) + + VALIDATORS = [ ( None, @@ -58,4 +65,8 @@ def valid_grammaticalityJudgement(dataset, table, column, row): None, 'http://cldf.clld.org/v1.0/terms.rdf#grammaticalityJudgement', valid_grammaticalityJudgement), + ( + None, + 'http://cldf.clld.org/v1.0/terms.rdf#mediaType', + valid_mediaType), ] diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 086c775..4f6d9db 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -2,6 +2,7 @@ import logging import zipfile import warnings +import mimetypes import contextlib import pytest @@ -177,6 +178,16 @@ def test_example_validators(ds): ds.validate() +def test_invalid_mimetype(ds, recwarn): + ds.add_component('MediaTable') + ds.write(MediaTable=[{ + 'ID': '1', + 'Media_Type': mimetypes.guess_type('f.png'), + 'Download_URL': 'http://example.org'}]) + ds.validate() + assert 'Invalid main part' in str(recwarn.pop(UserWarning).message) + + def test_regex_validator_for_listvalued_column(ds): ds.add_table( 'test',