diff --git a/.flake8 b/.flake8 index c56dba3ab..fc708eaf8 100644 --- a/.flake8 +++ b/.flake8 @@ -2,4 +2,4 @@ exclude = .git,.tox,__pycache__,dist,.venv*,docs,build max-line-length = 90 # black related: W503/W504 conflict, black causes E203 -ignore = W503,W504,E203,B019 +extend-ignore = W503,W504,E203,B019 diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 42abd1d61..6f0e0e09c 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -24,7 +24,7 @@ - id: check-azure-pipelines name: Validate Azure Pipelines description: 'Validate Azure Pipelines config against the schema provided by Microsoft' - entry: check-jsonschema --builtin-schema vendor.azure-pipelines --data-transform azure-pipelines + entry: check-jsonschema --builtin-schema vendor.azure-pipelines --data-transform azure-pipelines --regex-variant nonunicode language: python files: ^(\.)?azure-pipelines\.(yml|yaml)$ types: [yaml] diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ddd3a5414..dc23d3d6f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,9 +9,21 @@ Unreleased ---------- .. vendor-insert-here - - Update vendored schemas (2024-12-22) - Drop support for Python 3.8 +- Rename ``--format-regex`` to ``--regex-variant`` and convert + ``--format-regex`` to a deprecated alias. + It will be removed in a future release. +- Regular expression interpretation in ``"pattern"``, ``"patternProperties"``, and + ``"format": "regex"`` usages now uses unicode-mode JS regular expressions by + default. (:issue:`353`) + + - Use ``--regex-variant nonunicode`` to get non-unicode JS regular + expressions, the default behavior from previous versions. + - Custom validators may be impacted by the new regular expression + features. Validators are now always modified with the ``jsonschema`` + library's ``extend()`` API to control the ``pattern`` and + ``patternProperties`` keywords. 0.30.0 ------ diff --git a/docs/usage.rst b/docs/usage.rst index 56a5bb12a..7de004e75 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -183,11 +183,12 @@ Example usage: # disables all three of time, date-time, and iri --disable-formats time,date-time --disable-formats iri -``--format-regex`` +``--regex-variant`` ~~~~~~~~~~~~~~~~~~ -Set a mode for handling of the ``"regex"`` value for ``"format"``. The modes are as -follows: +Set a mode for handling of the ``"regex"`` value for ``"format"`` and the mode +for ``"pattern"`` and ``"patternProperties"`` interpretation. +The modes are as follows: .. list-table:: Regex Options :widths: 15 30 @@ -196,9 +197,11 @@ follows: * - mode - description * - default - - Require the regex to be valid in ECMAScript regex syntax. + - Use ECMAScript regex syntax. + * - nonunicode + - Use ECMAScript regex syntax, but without unicode escapes enabled. * - python - - Require the regex to be valid in Python regex syntax. + - Use Python regex syntax. Other Options -------------- diff --git a/pyproject.toml b/pyproject.toml index 530f6eee1..e26dfec2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ 'tomli>=2.0;python_version<"3.11"', "ruamel.yaml==0.18.7", "jsonschema>=4.18.0,<5.0", - "regress>=0.4.0", + "regress>=2024.11.1", "requests<3.0", "click>=8,<9", ] diff --git a/src/check_jsonschema/catalog.py b/src/check_jsonschema/catalog.py index 0ac915c23..cebbd78a5 100644 --- a/src/check_jsonschema/catalog.py +++ b/src/check_jsonschema/catalog.py @@ -31,7 +31,12 @@ def _githubusercontent_url(owner: str, repo: str, ref: str, path: str) -> str: "Validate Azure Pipelines config against the schema provided " "by Microsoft" ), - "add_args": ["--data-transform", "azure-pipelines"], + "add_args": [ + "--data-transform", + "azure-pipelines", + "--regex-variant", + "nonunicode", + ], "files": r"^(\.)?azure-pipelines\.(yml|yaml)$", "types": "yaml", }, diff --git a/src/check_jsonschema/checker.py b/src/check_jsonschema/checker.py index 63d42d4e6..c6cd852eb 100644 --- a/src/check_jsonschema/checker.py +++ b/src/check_jsonschema/checker.py @@ -11,6 +11,7 @@ from .formats import FormatOptions from .instance_loader import InstanceLoader from .parsers import ParseError +from .regex_variants import RegexImplementation from .reporter import Reporter from .result import CheckResult from .schema_loader import SchemaLoaderBase, SchemaParseError, UnsupportedUrlScheme @@ -28,7 +29,8 @@ def __init__( instance_loader: InstanceLoader, reporter: Reporter, *, - format_opts: FormatOptions | None = None, + format_opts: FormatOptions, + regex_impl: RegexImplementation, traceback_mode: str = "short", fill_defaults: bool = False, ) -> None: @@ -36,7 +38,8 @@ def __init__( self._instance_loader = instance_loader self._reporter = reporter - self._format_opts = format_opts if format_opts is not None else FormatOptions() + self._format_opts = format_opts + self._regex_impl = regex_impl self._traceback_mode = traceback_mode self._fill_defaults = fill_defaults @@ -51,12 +54,12 @@ def get_validator( ) -> jsonschema.protocols.Validator: try: return self._schema_loader.get_validator( - path, doc, self._format_opts, self._fill_defaults + path, doc, self._format_opts, self._regex_impl, self._fill_defaults ) except SchemaParseError as e: self._fail("Error: schemafile could not be parsed as JSON", e) except jsonschema.SchemaError as e: - self._fail(f"Error: schemafile was not valid: {e}\n", e) + self._fail("Error: schemafile was not valid\n", e) except UnsupportedUrlScheme as e: self._fail(f"Error: {e}\n", e) except Exception as e: diff --git a/src/check_jsonschema/cli/main_command.py b/src/check_jsonschema/cli/main_command.py index 8e15e0620..9e93ff1ff 100644 --- a/src/check_jsonschema/cli/main_command.py +++ b/src/check_jsonschema/cli/main_command.py @@ -9,9 +9,10 @@ from ..catalog import CUSTOM_SCHEMA_NAMES, SCHEMA_CATALOG from ..checker import SchemaChecker -from ..formats import KNOWN_FORMATS, RegexVariantName +from ..formats import KNOWN_FORMATS from ..instance_loader import InstanceLoader from ..parsers import SUPPORTED_FILE_FORMATS +from ..regex_variants import RegexImplementation, RegexVariantName from ..reporter import REPORTER_BY_NAME, Reporter from ..schema_loader import ( BuiltinSchemaLoader, @@ -68,10 +69,11 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: date, date-time, email, ipv4, ipv6, regex, uuid \b -For the "regex" format, there are multiple modes which can be specified with -'--format-regex': - default | check that the string is a valid ECMAScript regex - python | check that the string is a valid python regex +For handling of regexes, there are multiple modes which can be specified with +'--regex-variant': + default | use ECMAScript regex syntax (via regress) + nonunicode | use ECMAScript regex syntax, but in non-unicode mode (via regress) + python | use python regex syntax \b The '--builtin-schema' flag supports the following schema names: @@ -138,11 +140,18 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str: ) @click.option( "--format-regex", + hidden=True, + help="Legacy name for `--regex-variant`.", + default=None, + type=click.Choice([x.value for x in RegexVariantName], case_sensitive=False), +) +@click.option( + "--regex-variant", help=( - "Set the mode of format validation for regexes. " - "If `--disable-formats regex` is used, this option has no effect." + "Name of which regex dialect should be used for format checking " + "and 'pattern' matching." ), - default=RegexVariantName.default.value, + default=None, type=click.Choice([x.value for x in RegexVariantName], case_sensitive=False), ) @click.option( @@ -230,7 +239,8 @@ def main( no_cache: bool, cache_filename: str | None, disable_formats: tuple[list[str], ...], - format_regex: t.Literal["python", "default"], + format_regex: t.Literal["python", "nonunicode", "default"] | None, + regex_variant: t.Literal["python", "nonunicode", "default"] | None, default_filetype: t.Literal["json", "yaml", "toml", "json5"], traceback_mode: t.Literal["full", "short"], data_transform: t.Literal["azure-pipelines", "gitlab-ci"] | None, @@ -243,6 +253,8 @@ def main( ) -> None: args = ParseResult() + args.set_regex_variant(regex_variant, legacy_opt=format_regex) + args.set_schema(schemafile, builtin_schema, check_metaschema) args.set_validator(validator_class) @@ -257,7 +269,6 @@ def main( else: args.disable_formats = normalized_disable_formats - args.format_regex = RegexVariantName(format_regex) args.disable_cache = no_cache args.default_filetype = default_filetype args.fill_defaults = fill_defaults @@ -318,6 +329,7 @@ def build_checker(args: ParseResult) -> SchemaChecker: instance_loader, reporter, format_opts=args.format_opts, + regex_impl=RegexImplementation(args.regex_variant), traceback_mode=args.traceback_mode, fill_defaults=args.fill_defaults, ) diff --git a/src/check_jsonschema/cli/parse_result.py b/src/check_jsonschema/cli/parse_result.py index a317378f9..bfd9065b1 100644 --- a/src/check_jsonschema/cli/parse_result.py +++ b/src/check_jsonschema/cli/parse_result.py @@ -6,7 +6,8 @@ import click import jsonschema -from ..formats import FormatOptions, RegexVariantName +from ..formats import FormatOptions +from ..regex_variants import RegexImplementation, RegexVariantName from ..transforms import Transform @@ -36,12 +37,24 @@ def __init__(self) -> None: # regex format options self.disable_all_formats: bool = False self.disable_formats: tuple[str, ...] = () - self.format_regex: RegexVariantName = RegexVariantName.default + self.regex_variant: RegexVariantName = RegexVariantName.default # error and output controls self.verbosity: int = 1 self.traceback_mode: str = "short" self.output_format: str = "text" + def set_regex_variant( + self, + variant_opt: t.Literal["python", "nonunicode", "default"] | None, + *, + legacy_opt: t.Literal["python", "nonunicode", "default"] | None = None, + ) -> None: + variant_name: t.Literal["python", "nonunicode", "default"] | None = ( + variant_opt or legacy_opt + ) + if variant_name: + self.regex_variant = RegexVariantName(variant_name) + def set_schema( self, schemafile: str | None, builtin_schema: str | None, check_metaschema: bool ) -> None: @@ -82,7 +95,7 @@ def set_validator( @property def format_opts(self) -> FormatOptions: return FormatOptions( + regex_impl=RegexImplementation(self.regex_variant), enabled=not self.disable_all_formats, - regex_variant=self.format_regex, disabled_formats=self.disable_formats, ) diff --git a/src/check_jsonschema/formats/__init__.py b/src/check_jsonschema/formats/__init__.py index 8202d9a00..2308c4313 100644 --- a/src/check_jsonschema/formats/__init__.py +++ b/src/check_jsonschema/formats/__init__.py @@ -1,14 +1,11 @@ from __future__ import annotations import copy -import enum -import re -import typing as t import jsonschema import jsonschema.validators -import regress +from ..regex_variants import RegexImplementation from .implementations import validate_rfc3339, validate_time # all known format strings except for a selection from draft3 which have either @@ -39,42 +36,16 @@ ) -class RegexVariantName(enum.Enum): - default = "default" - python = "python" - - -class RegexImplementation: - def __init__(self, variant: RegexVariantName) -> None: - self.variant = variant - - def check_format(self, instance: t.Any) -> bool: - if not isinstance(instance, str): - return True - - try: - if self.variant == RegexVariantName.default: - regress.Regex(instance) - else: - re.compile(instance) - # something is wrong with RegressError getting into the published types - # needs investigation... for now, ignore the error - except (regress.RegressError, re.error): # type: ignore[attr-defined] - return False - - return True - - class FormatOptions: def __init__( self, *, + regex_impl: RegexImplementation, enabled: bool = True, - regex_variant: RegexVariantName = RegexVariantName.default, disabled_formats: tuple[str, ...] = (), ) -> None: self.enabled = enabled - self.regex_variant = regex_variant + self.regex_impl = regex_impl self.disabled_formats = disabled_formats @@ -95,14 +66,10 @@ def make_format_checker( if not opts.enabled: return None - # copy the base checker - base_checker = get_base_format_checker(schema_dialect) - checker = copy.deepcopy(base_checker) + # customize around regex checking first + checker = format_checker_for_regex_impl(opts.regex_impl) - # replace the regex check - del checker.checkers["regex"] - regex_impl = RegexImplementation(opts.regex_variant) - checker.checks("regex")(regex_impl.check_format) + # add other custom format checks checker.checks("date-time")(validate_rfc3339) checker.checks("time")(validate_time) @@ -113,3 +80,18 @@ def make_format_checker( del checker.checkers[checkname] return checker + + +def format_checker_for_regex_impl( + regex_impl: RegexImplementation, schema_dialect: str | None = None +) -> jsonschema.FormatChecker: + # convert to a schema-derived format checker, and copy it + # for safe modification + base_checker = get_base_format_checker(schema_dialect) + checker = copy.deepcopy(base_checker) + + # replace the regex check + del checker.checkers["regex"] + checker.checks("regex")(regex_impl.check_format) + + return checker diff --git a/src/check_jsonschema/regex_variants.py b/src/check_jsonschema/regex_variants.py new file mode 100644 index 000000000..b76527867 --- /dev/null +++ b/src/check_jsonschema/regex_variants.py @@ -0,0 +1,142 @@ +import enum +import re +import typing as t + +import jsonschema +import regress + + +class RegexVariantName(enum.Enum): + default = "default" + nonunicode = "nonunicode" + python = "python" + + +class RegexImplementation: + """ + A high-level interface for getting at the different possible + implementations of regex behaviors. + """ + + _concrete: "_ConcreteImplementation" + + def __init__(self, variant: RegexVariantName) -> None: + self.variant = variant + + if self.variant == RegexVariantName.default: + self._concrete = _RegressImplementation() + elif self.variant == RegexVariantName.nonunicode: + self._concrete = _NonunicodeRegressImplementation() + else: + self._concrete = _PythonImplementation() + + self.check_format = self._concrete.check_format + self.pattern_keyword = self._concrete.pattern_keyword + self.patternProperties_keyword = self._concrete.patternProperties_keyword + + +class _ConcreteImplementation(t.Protocol): + def check_format(self, instance: t.Any) -> bool: ... + + def pattern_keyword( + self, validator: t.Any, pattern: str, instance: str, schema: t.Any + ) -> t.Iterator[jsonschema.ValidationError]: ... + + def patternProperties_keyword( + self, + validator: t.Any, + patternProperties: dict[str, t.Any], + instance: dict[str, t.Any], + schema: t.Any, + ) -> t.Iterator[jsonschema.ValidationError]: ... + + +class _RegressImplementation: + def _compile_pattern(self, pattern: str) -> regress.Regex: + return regress.Regex(pattern, flags="u") + + def check_format(self, instance: t.Any) -> bool: + if not isinstance(instance, str): + return True + try: + self._compile_pattern(instance) + except regress.RegressError: + return False + return True + + def pattern_keyword( + self, validator: t.Any, pattern: str, instance: str, schema: t.Any + ) -> t.Iterator[jsonschema.ValidationError]: + if not validator.is_type(instance, "string"): + return + + regress_pattern = self._compile_pattern(pattern) + if not regress_pattern.find(instance): + yield jsonschema.ValidationError(f"{instance!r} does not match {pattern!r}") + + def patternProperties_keyword( + self, + validator: t.Any, + patternProperties: dict[str, t.Any], + instance: dict[str, t.Any], + schema: t.Any, + ) -> t.Iterator[jsonschema.ValidationError]: + if not validator.is_type(instance, "object"): + return + + for pattern, subschema in patternProperties.items(): + regress_pattern = self._compile_pattern(pattern) + for k, v in instance.items(): + if regress_pattern.find(k): + yield from validator.descend( + v, + subschema, + path=k, + schema_path=pattern, + ) + + +class _NonunicodeRegressImplementation(_RegressImplementation): + def _compile_pattern(self, pattern: str) -> regress.Regex: + return regress.Regex(pattern) + + +class _PythonImplementation: + def check_format(self, instance: t.Any) -> bool: + if not isinstance(instance, str): + return True + try: + re.compile(instance) + except re.error: + return False + return True + + def pattern_keyword( + self, validator: t.Any, pattern: str, instance: str, schema: t.Any + ) -> t.Iterator[jsonschema.ValidationError]: + if not validator.is_type(instance, "string"): + return + + re_pattern = re.compile(pattern) + if not re_pattern.search(instance): + yield jsonschema.ValidationError(f"{instance!r} does not match {pattern!r}") + + def patternProperties_keyword( + self, + validator: t.Any, + patternProperties: dict[str, t.Any], + instance: dict[str, t.Any], + schema: t.Any, + ) -> t.Iterator[jsonschema.ValidationError]: + if not validator.is_type(instance, "object"): + return + + for pattern, subschema in patternProperties.items(): + for k, v in instance.items(): + if re.search(pattern, k): + yield from validator.descend( + v, + subschema, + path=k, + schema_path=pattern, + ) diff --git a/src/check_jsonschema/schema_loader/main.py b/src/check_jsonschema/schema_loader/main.py index 099107455..e056389a9 100644 --- a/src/check_jsonschema/schema_loader/main.py +++ b/src/check_jsonschema/schema_loader/main.py @@ -9,8 +9,9 @@ import jsonschema from ..builtin_schemas import get_builtin_schema -from ..formats import FormatOptions, make_format_checker +from ..formats import FormatOptions, format_checker_for_regex_impl, make_format_checker from ..parsers import ParserSet +from ..regex_variants import RegexImplementation from ..utils import is_url_ish from .errors import UnsupportedUrlScheme from .readers import HttpSchemaReader, LocalSchemaReader, StdinSchemaReader @@ -45,12 +46,26 @@ def set_defaults_then_validate( ) +def _extend_with_pattern_implementation( + validator_class: type[jsonschema.protocols.Validator], + regex_impl: RegexImplementation, +) -> type[jsonschema.Validator]: + return jsonschema.validators.extend( + validator_class, + { + "pattern": regex_impl.pattern_keyword, + "patternProperties": regex_impl.patternProperties_keyword, + }, + ) + + class SchemaLoaderBase: def get_validator( self, path: pathlib.Path | str, instance_doc: dict[str, t.Any], format_opts: FormatOptions, + regex_impl: RegexImplementation, fill_defaults: bool, ) -> jsonschema.protocols.Validator: raise NotImplementedError @@ -124,22 +139,21 @@ def get_validator( path: pathlib.Path | str, instance_doc: dict[str, t.Any], format_opts: FormatOptions, + regex_impl: RegexImplementation, fill_defaults: bool, ) -> jsonschema.protocols.Validator: - return self._get_validator(format_opts, fill_defaults) + return self._get_validator(format_opts, regex_impl, fill_defaults) @functools.lru_cache def _get_validator( self, format_opts: FormatOptions, + regex_impl: RegexImplementation, fill_defaults: bool, ) -> jsonschema.protocols.Validator: retrieval_uri = self.get_schema_retrieval_uri() schema = self.get_schema() - - schema_dialect = schema.get("$schema") - if schema_dialect is not None and not isinstance(schema_dialect, str): - schema_dialect = None + schema_dialect = _dialect_of_schema(schema) # format checker (which may be None) format_checker = make_format_checker(format_opts, schema_dialect) @@ -153,7 +167,8 @@ def _get_validator( if self.validator_class is None: # get the correct validator class and check the schema under its metaschema validator_cls = jsonschema.validators.validator_for(schema) - validator_cls.check_schema(schema) + + _check_schema(validator_cls, schema, regex_impl=regex_impl) else: # for a user-provided validator class, don't check_schema # on the grounds that it might *not* be valid but the user wants to use @@ -168,6 +183,9 @@ def _get_validator( if fill_defaults: validator_cls = _extend_with_default(validator_cls) + # set the regex variant for 'pattern' keywords + validator_cls = _extend_with_pattern_implementation(validator_cls, regex_impl) + # now that we know it's safe to try to create the validator instance, do it validator = validator_cls( schema, @@ -177,6 +195,44 @@ def _get_validator( return t.cast(jsonschema.protocols.Validator, validator) +def _check_schema( + validator_cls: type[jsonschema.protocols.Validator], + schema: dict[str, t.Any], + *, + regex_impl: RegexImplementation, +) -> None: + """A variant definition of Validator.check_schema which uses the regex + implementation and format checker specified.""" + # construct the metaschema validator class (with customized regex impl) + schema_validator_cls = jsonschema.validators.validator_for( + validator_cls.META_SCHEMA, default=validator_cls + ) + schema_validator_cls = _extend_with_pattern_implementation( + schema_validator_cls, regex_impl + ) + + # construct a specialized format checker (again, customized regex impl) + metaschema_dialect = _dialect_of_schema(validator_cls.META_SCHEMA) + format_checker = format_checker_for_regex_impl(regex_impl, metaschema_dialect) + + # now, construct and apply the actual validator + schema_validator = schema_validator_cls( + validator_cls.META_SCHEMA, format_checker=format_checker + ) + for error in schema_validator.iter_errors(schema): + raise jsonschema.exceptions.SchemaError.create_from(error) + + +def _dialect_of_schema(schema: dict[str, t.Any] | bool) -> str | None: + if not isinstance(schema, dict): + return None + + schema_dialect = schema.get("$schema") + if schema_dialect is not None and not isinstance(schema_dialect, str): + schema_dialect = None + return schema_dialect + + class BuiltinSchemaLoader(SchemaLoader): def __init__(self, schema_name: str, *, base_uri: str | None = None) -> None: self.schema_name = schema_name @@ -206,6 +262,7 @@ def get_validator( path: pathlib.Path | str, instance_doc: dict[str, t.Any], format_opts: FormatOptions, + regex_impl: RegexImplementation, fill_defaults: bool, ) -> jsonschema.protocols.Validator: schema_validator = jsonschema.validators.validator_for(instance_doc) diff --git a/tests/acceptance/test_custom_validator_class.py b/tests/acceptance/test_custom_validator_class.py index 170f4524b..9504b963b 100644 --- a/tests/acceptance/test_custom_validator_class.py +++ b/tests/acceptance/test_custom_validator_class.py @@ -66,24 +66,32 @@ def _foo_module(mock_module): """\ import jsonschema -class MyValidator: - def __init__(self, schema, *args, **kwargs): - self.schema = schema - self.real_validator = jsonschema.validators.Draft7Validator( - schema, *args, **kwargs - ) - - def iter_errors(self, data, *args, **kwargs): - yield from self.real_validator.iter_errors(data, *args, **kwargs) - for event in data["events"]: - if "Occult" in event["title"]: + +def check_occult_properties(validator, properties, instance, schema): + if not validator.is_type(instance, "object"): + return + + for property, subschema in properties.items(): + if property in instance: + if property == "title" and "Occult" in instance["title"]: yield jsonschema.exceptions.ValidationError( "Error! Occult event detected! Run!", - validator=None, + validator=validator, validator_value=None, - instance=event, - schema=self.schema, + instance=instance, + schema=schema, ) + yield from validator.descend( + instance[property], + subschema, + path=property, + schema_path=property, + ) + +MyValidator = jsonschema.validators.extend( + jsonschema.validators.Draft7Validator, + {"properties": check_occult_properties}, +) """, ) @@ -115,7 +123,7 @@ def test_custom_validator_class_can_detect_custom_conditions(run_line, tmp_path) str(doc), ], ) - assert result.exit_code == 1 # fail + assert result.exit_code == 1, result.stdout # fail assert "Occult event detected" in result.stdout, result.stdout diff --git a/tests/acceptance/test_example_files.py b/tests/acceptance/test_example_files.py index 057f07a5a..10411741f 100644 --- a/tests/acceptance/test_example_files.py +++ b/tests/acceptance/test_example_files.py @@ -63,7 +63,7 @@ def test_hook_positive_examples(case_name, run_line): hook_id = POSITIVE_HOOK_CASES[case_name] ret = run_line(HOOK_CONFIG[hook_id] + [rcase.path] + rcase.add_args) - assert ret.exit_code == 0, _format_cli_result(rcase, ret) + assert ret.exit_code == 0, _format_cli_result(ret, rcase) @pytest.mark.parametrize("case_name", NEGATIVE_HOOK_CASES.keys()) @@ -72,7 +72,7 @@ def test_hook_negative_examples(case_name, run_line): hook_id = NEGATIVE_HOOK_CASES[case_name] ret = run_line(HOOK_CONFIG[hook_id] + [rcase.path] + rcase.add_args) - assert ret.exit_code == 1, _format_cli_result(rcase, ret) + assert ret.exit_code == 1, _format_cli_result(ret, rcase) @pytest.mark.parametrize("case_name", _get_explicit_cases("positive")) @@ -102,7 +102,37 @@ def test_explicit_positive_examples(case_name, run_line): str(instance), ] ) - assert ret.exit_code == 0 + assert ret.exit_code == 0, _format_cli_result(ret) + + +@pytest.mark.parametrize("case_name", _get_explicit_cases("negative")) +def test_explicit_negative_examples(case_name, run_line): + _check_file_format_skip(case_name) + casedir = EXAMPLE_EXPLICIT_FILES / "negative" / case_name + + instance = casedir / "instance.json" + if not instance.exists(): + instance = casedir / "instance.yaml" + if not instance.exists(): + instance = casedir / "instance.toml" + if not instance.exists(): + raise Exception("could not find an instance file for test case") + + schema = casedir / "schema.json" + if not schema.exists(): + schema = casedir / "schema.yaml" + if not schema.exists(): + raise Exception("could not find a schema file for test case") + + ret = run_line( + [ + "check-jsonschema", + "--schemafile", + str(schema), + str(instance), + ] + ) + assert ret.exit_code == 1, _format_cli_result(ret) def _check_file_format_skip(case_name): @@ -166,10 +196,12 @@ def _package_is_installed(pkg: str) -> bool: return True -def _format_cli_result(rcase: ResolvedCase, result) -> str: +def _format_cli_result(result, rcase: ResolvedCase | None = None) -> str: + prefix = "" + if rcase is not None: + prefix = f"config.add_args={rcase.add_args}\n" return ( - "\n" - f"config.add_args={rcase.add_args}\n" + f"\n{prefix}" f"{result.exit_code=}\n" f"result.stdout={result.output}\n" f"{result.stderr=}" diff --git a/tests/acceptance/test_format_regex_opts.py b/tests/acceptance/test_format_regex_opts.py index 1f0486170..deb4e0fe7 100644 --- a/tests/acceptance/test_format_regex_opts.py +++ b/tests/acceptance/test_format_regex_opts.py @@ -1,6 +1,6 @@ # test on a JavaScript regex which is not a valid python regex -# `--format-regex=default` should accept it -# `--format-regex=python` should reject it +# `--regex-variant=default` should accept it +# `--regex-variant=python` should reject it # # check these options against documents with invalid and valid python regexes to confirm # that they are behaving as expected @@ -43,6 +43,10 @@ ("--disable-formats", "regex"), ("--format-regex", "default"), ("--format-regex", "python"), + ("--regex-variant", "python"), + ("--regex-variant", "default"), + ("--regex-variant", "default", "--format-regex", "python"), + ("--regex-variant", "python", "--format-regex", "default"), ] ) def regexopts(request): @@ -108,7 +112,10 @@ def test_regex_format_js_specific(run_line, tmp_path, regexopts): doc = tmp_path / "doc.json" doc.write_text(json.dumps(JS_REGEX_DOCUMENT)) - expect_ok = regexopts != ("--format-regex", "python") + expect_ok = regexopts[:2] not in ( + ("--format-regex", "python"), + ("--regex-variant", "python"), + ) res = run_line( [ diff --git a/tests/acceptance/test_invalid_schema_files.py b/tests/acceptance/test_invalid_schema_files.py index c4cf62c72..71efda024 100644 --- a/tests/acceptance/test_invalid_schema_files.py +++ b/tests/acceptance/test_invalid_schema_files.py @@ -1,3 +1,6 @@ +import pytest + + def test_checker_non_json_schemafile(run_line, tmp_path): foo = tmp_path / "foo.json" bar = tmp_path / "bar.json" @@ -29,3 +32,24 @@ def test_checker_invalid_schemafile_scheme(run_line, tmp_path): res = run_line(["check-jsonschema", "--schemafile", f"ftp://{foo}", str(bar)]) assert res.exit_code == 1 assert "only supports http, https" in res.stderr + + +@pytest.mark.parametrize( + "add_args", + [ + pytest.param([], id="noargs"), + # ensure that this works even when regex checking is disabled + pytest.param(["--disable-formats", "*"], id="all-formats-disabled"), + pytest.param(["--disable-formats", "regex"], id="regex-format-disabled"), + ], +) +def test_checker_invalid_schemafile_due_to_bad_regex(run_line, tmp_path, add_args): + foo = tmp_path / "foo.json" + bar = tmp_path / "bar.json" + # too many backslash escapes -- not a valid Unicode-mode regex + foo.write_text(r'{"properties": {"foo": {"pattern": "\\\\p{N}"}}}') + bar.write_text("{}") + + res = run_line(["check-jsonschema", "--schemafile", str(foo), str(bar), *add_args]) + assert res.exit_code == 1 + assert "schemafile was not valid" in res.stderr diff --git a/tests/example-files/explicit-schema/negative/unicode_pattern/instance.json b/tests/example-files/explicit-schema/negative/unicode_pattern/instance.json new file mode 100644 index 000000000..0bce573bf --- /dev/null +++ b/tests/example-files/explicit-schema/negative/unicode_pattern/instance.json @@ -0,0 +1,4 @@ +{ + "key": "foo 1", + "value": "bar 2" +} diff --git a/tests/example-files/explicit-schema/negative/unicode_pattern/schema.json b/tests/example-files/explicit-schema/negative/unicode_pattern/schema.json new file mode 100644 index 000000000..3511f41b2 --- /dev/null +++ b/tests/example-files/explicit-schema/negative/unicode_pattern/schema.json @@ -0,0 +1,20 @@ +{ + "additionalProperties": false, + "properties": { + "key": { + "description": "some key", + "maxLength": 128, + "minLength": 1, + "pattern": "^\\p{L}\\p{Z}\\p{N}$", + "type": "string" + }, + "value": { + "description": "some value", + "maxLength": 256, + "minLength": 0, + "pattern": "^\\p{L}\\p{Z}\\p{N}$", + "type": "string" + } + }, + "type": "object" +} diff --git a/tests/example-files/explicit-schema/positive/unicode_pattern/instance.json b/tests/example-files/explicit-schema/positive/unicode_pattern/instance.json new file mode 100644 index 000000000..6766d3091 --- /dev/null +++ b/tests/example-files/explicit-schema/positive/unicode_pattern/instance.json @@ -0,0 +1,4 @@ +{ + "key": "a 1", + "value": "b 2" +} diff --git a/tests/example-files/explicit-schema/positive/unicode_pattern/schema.json b/tests/example-files/explicit-schema/positive/unicode_pattern/schema.json new file mode 100644 index 000000000..3511f41b2 --- /dev/null +++ b/tests/example-files/explicit-schema/positive/unicode_pattern/schema.json @@ -0,0 +1,20 @@ +{ + "additionalProperties": false, + "properties": { + "key": { + "description": "some key", + "maxLength": 128, + "minLength": 1, + "pattern": "^\\p{L}\\p{Z}\\p{N}$", + "type": "string" + }, + "value": { + "description": "some value", + "maxLength": 256, + "minLength": 0, + "pattern": "^\\p{L}\\p{Z}\\p{N}$", + "type": "string" + } + }, + "type": "object" +} diff --git a/tox.ini b/tox.ini index c3a64592a..6edec916f 100644 --- a/tox.ini +++ b/tox.ini @@ -30,12 +30,14 @@ deps = format: jsonschema[format] commands = coverage run -m pytest {posargs:--junitxml={envdir}/pytest.xml} +depends = cov_clean [testenv:cov_clean] description = "erase coverage data to prepare for a new run" deps = coverage skip_install = true commands = coverage erase +depends = [testenv:cov] description = "combine and report coverage data" @@ -43,6 +45,7 @@ deps = coverage skip_install = true commands_pre = - coverage combine commands = coverage report --skip-covered +depends = py{,38,39,310,311,312}{,-mindeps,-format,-json5,-pyjson5,-disable_orjson} [testenv:mypy] description = "check type annotations with mypy" @@ -51,6 +54,7 @@ deps = mypy types-requests click commands = mypy src/ {posargs} +depends = [testenv:pyright] description = "check type annotations with pyright"