Skip to content

Commit

Permalink
Check for forbidden HTML tags
Browse files Browse the repository at this point in the history
CMK-17499
  • Loading branch information
jherbel committed May 22, 2024
1 parent fcfe3c4 commit c67d904
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 2 deletions.
18 changes: 18 additions & 0 deletions checkmk_weblate_syncer/html_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import re

# keep in sync with tests/pylint/checker_localization.py:HTMLTagsChecker
_TAG_PATTERN = re.compile("<.*?>")
_ALLOWED_TAGS_PATTERN = re.compile(
r"</?(h1|h2|b|tt|i|u|hr|br(?: /)?|nobr(?: /)?|pre|sup|p|li|ul|ol|a|(a.*? href=.*?))>"
)


def forbidden_tags(text: str) -> set[str]:
return {
tag
for tag in re.findall(
_TAG_PATTERN,
text,
)
if not re.match(_ALLOWED_TAGS_PATTERN, tag)
}
13 changes: 13 additions & 0 deletions checkmk_weblate_syncer/portable_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
from pathlib import Path


def remove_header(portable_object_content: str) -> str:
pattern = re.compile(r"^#: .*?:\d+$")
lines = portable_object_content.splitlines()
index_first_source_string_location = 0
for index, line in enumerate(lines):
if re.match(pattern, line):
index_first_source_string_location = index
break
return "\n".join(lines[index_first_source_string_location:]) + (
"\n" if portable_object_content.endswith("\n") else ""
)


def make_soure_string_locations_relative(
portable_object_content: str,
relative_to: Path,
Expand Down
9 changes: 8 additions & 1 deletion checkmk_weblate_syncer/update_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

from .config import UpdateSourcesConfig
from .git import commit_and_push_files, repository_in_clean_state
from .html_tags import forbidden_tags
from .logging import LOGGER
from .portable_object import make_soure_string_locations_relative
from .portable_object import make_soure_string_locations_relative, remove_header


def run(config: UpdateSourcesConfig) -> int:
Expand All @@ -30,6 +31,12 @@ def run(config: UpdateSourcesConfig) -> int:
LOGGER.error("Generating pot file failed")
raise e

LOGGER.info("Checking HTML tags")
if forbidden_html_tags := forbidden_tags(remove_header(pot_file_content)):
raise ValueError(
f"Found forbidden HTML tags: {', '.join(sorted(forbidden_html_tags))}"
)

LOGGER.info("Making source string locations relative")
pot_file_content = make_soure_string_locations_relative(
portable_object_content=pot_file_content,
Expand Down
14 changes: 13 additions & 1 deletion checkmk_weblate_syncer/update_translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,13 @@

from .config import PoFilePair, RepositoryConfig, UpdateTranslationsConfig
from .git import commit_and_push_files, repository_in_clean_state
from .html_tags import forbidden_tags
from .logging import LOGGER
from .portable_object import remove_last_translator, remove_source_string_locations
from .portable_object import (
remove_header,
remove_last_translator,
remove_source_string_locations,
)


@dataclass(frozen=True)
Expand Down Expand Up @@ -105,6 +110,13 @@ def _process_po_file_pair(
path=locale_po_file,
)

LOGGER.info("Checking HTML tags")
if forbidden_html_tags := forbidden_tags(remove_header(po_file_content)):
return _Failure(
error_message=f"Found forbidden HTML tags: {', '.join(sorted(forbidden_html_tags))}",
path=locale_po_file,
)

LOGGER.info("Stripping source string locations and Last-Translator")
po_file_content = remove_source_string_locations(po_file_content)
po_file_content = remove_last_translator(po_file_content)
Expand Down
59 changes: 59 additions & 0 deletions tests/test_html_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pytest

from checkmk_weblate_syncer.html_tags import forbidden_tags


@pytest.mark.parametrize(
["text", "expected_result"],
[
pytest.param(
"abc123",
frozenset(),
),
pytest.param(
"<tt>bold</tt>",
frozenset(),
),
pytest.param(
'* ? <a href="%s">%s</a>',
frozenset(),
),
pytest.param(
'&copy; <a target="_blank" href="https://checkmk.com">Checkmk GmbH</a>',
frozenset(),
),
pytest.param(
"123 <script>injection</script>",
frozenset(
["<script>", "</script>"],
),
),
pytest.param(
# pylint: disable=line-too-long
"""#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/wato/pages/host_rename.py:640
#, python-format
msgid " (%d times)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/visuals/_page_edit_visual.py:137
msgid " (Copy)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/nodevis/topology.py:1814
msgid " (Data incomplete, maximum number of nodes reached)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/backup/handler.py:969
#, python-format
msgid " (Duration: %s)"
msgstr ""
""",
frozenset(),
),
],
)
def test_html_tags_checker(
text: str,
expected_result: frozenset[str],
) -> None:
assert forbidden_tags(text) == expected_result
62 changes: 62 additions & 0 deletions tests/test_portable_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,73 @@

from checkmk_weblate_syncer.portable_object import (
make_soure_string_locations_relative,
remove_header,
remove_last_translator,
remove_source_string_locations,
)


def test_remove_header() -> None:
assert (
remove_header(
"""# Copyright (C) 2019 Checkmk GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
msgid ""
msgstr ""
"Project-Id-Version: Checkmk user interface translation 0.1\n"
"Report-Msgid-Bugs-To: [email protected]\n"
"POT-Creation-Date: 2011-05-13 09:42+0200\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"Language: LANGUAGE \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/wato/pages/host_rename.py:640
#, python-format
msgid " (%d times)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/visuals/_page_edit_visual.py:137
msgid " (Copy)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/nodevis/topology.py:1814
msgid " (Data incomplete, maximum number of nodes reached)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/backup/handler.py:969
#, python-format
msgid " (Duration: %s)"
msgstr ""
"""
)
# pylint: disable=line-too-long
== """#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/wato/pages/host_rename.py:640
#, python-format
msgid " (%d times)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/visuals/_page_edit_visual.py:137
msgid " (Copy)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/nodevis/topology.py:1814
msgid " (Data incomplete, maximum number of nodes reached)"
msgstr ""
#: /home/weblate/checkmk_weblate_sync/git/checkmk/cmk/gui/backup/handler.py:969
#, python-format
msgid " (Duration: %s)"
msgstr ""
"""
)


def test_make_soure_string_locations_relative() -> None:
assert (
make_soure_string_locations_relative(
Expand Down

0 comments on commit c67d904

Please sign in to comment.