diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d67bdd1..a3e0cb4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,6 +23,26 @@ Please see the fragment files in the `changelog.d directory`_. .. scriv-insert-here +.. _changelog-0.4.0: + +0.4.0 - 2023-11-30 +================== + +Fixed +----- + +* Fix file writing, which wasn't rendering newlines correctly. + +Changed +------- + +* Determine the configured file encoding only if the file has content. + +Documentation +------------- + +* Document how to create a new custom style. + .. _changelog-0.3.0: 0.3.0 - 2023-11-29 diff --git a/README.rst b/README.rst index 0d0ce99..2b38ed1 100644 --- a/README.rst +++ b/README.rst @@ -56,6 +56,6 @@ Here's a sample configuration for ensuring your files have correct headers: # .pre-commit-config.yaml repos: - repo: 'https://github.com/kurtmckee/chipshot' - rev: 'main' + rev: 'v0.4.0' hooks: - id: 'update-headers' diff --git a/docs/how-to/custom-headers.rst b/docs/how-to/custom-headers.rst new file mode 100644 index 0000000..658dba2 --- /dev/null +++ b/docs/how-to/custom-headers.rst @@ -0,0 +1,107 @@ +How To Create Custom Headers +############################ + +Chipshot supports a wide variety of header styles +for many different programming and markup languages, +but you might want to create a new style. + +This document walks you through the process of creating a new style. + +* :ref:`variables` +* :ref:`create` + +.. _variables: + +The Four Control Variables +========================== + +Chipshot anticipates that all header styles require +some kind of document-specific comment markup, +and that the comment markup will follow some of these rules: + +* There may be some kind of markup that introduces the comment block. + + Some languages support multiline comments + which are introduced with start and end markers. + For example, Typescript supports ``/*`` and ``*/`` markers. + + Some languages only use a dedicated start marker. + For example, ReStructuredText uses ``..`` as its start marker + and relies on blank lines to signal the end of the comment. + + Chipshot uses the terms ``block_prefix`` and ``block_suffix`` + to refer to the start and end markers. + +* Individual lines may need to prefixed with some kind of markup. + + Some languages only support single-line comments. + For example, PostgreSQL comment lines must start with ``--``. + + Chipshot support line prefixes and, if desired, line suffixes. + It uses the terms ``line_prefix`` and ``line_suffix`` + to refer to the beginning-of-line and end-of-line markers. + + +.. _create: + +Create a New Style +================== + +All styles in Chipshot are created under the ``styles`` configuration key, +and then file extensions are configured to use the new style. +(It is not supported to define the style in the file extension configuration.) + +Let's say you want to add support for PHP files. +First, define the style in ``.chipshot.toml``. +In the example below, the style name is defined as ``my-php-style``. +Escaped newlines must be embedded in the block prefix and suffix +to ensure that they render nicely. + +.. code-block:: toml + + [chipshot.styles.my-php-style] + block_prefix = "" + +Then, configure files with the "php" extension to use the new style. + +.. code-block:: toml + + [chipshot.extension.php] + style = "my-php-style" + +Here's the complete ``.chipshot.toml`` file, including a template: + +.. code-block:: toml + + [chipshot] + template = """ + Copyright 2022-{{ year }} Company Name + Licensed under the terms of the MIT License. + + [chipshot.styles.my-php-style] + block_prefix = "" + + [chipshot.extension.php] + style = "my-php-style" + +Run Chipshot with the ``--update`` flag and pass it a PHP file to update. +For example: + +.. code-block:: console + + $ chipshot --update example.php + +The header will be added at the top of the file like this: + +.. code-block:: php + + diff --git a/docs/index.rst b/docs/index.rst index 570260d..6740f42 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -61,7 +61,11 @@ Getting Started How-To Guides ============= -* How to customize comment styles +.. toctree:: + :maxdepth: 1 + + how-to/custom-headers + * How to integrate Chipshot in your everyday development diff --git a/pyproject.toml b/pyproject.toml index fe34f09..cb87691 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "chipshot" -version = "0.3.0" +version = "0.4.0" description = "Set up game-winning headers!" readme = "README.rst" authors = ["Kurt McKee "] diff --git a/src/chipshot/reader/__init__.py b/src/chipshot/reader/__init__.py index 6092c22..5586f98 100644 --- a/src/chipshot/reader/__init__.py +++ b/src/chipshot/reader/__init__.py @@ -8,7 +8,6 @@ import pathlib import typing as t -from ..config import get_config_value from ..shared import FileInfo from . import encoding, header, newlines, prologue @@ -19,26 +18,15 @@ def read(path: pathlib.Path, config: dict[str, t.Any]) -> FileInfo: """Read a file and return its contents and metadata.""" raw_contents = path.read_bytes() - info = FileInfo( - path=path, - raw_contents=raw_contents, - encoding=_determine_default_encoding(path, config), - ) + info = FileInfo(path=path, raw_contents=raw_contents) # If the file is empty, skip all other steps. if not raw_contents: return info - encoding.handle(info) + encoding.handle(info, config) newlines.handle(info) prologue.handle(info, config) header.handle(info, config) return info - - -def _determine_default_encoding(path: pathlib.Path, config: dict[str, t.Any]) -> str: - """Determine the default encoding for the given path.""" - - (default_encoding,) = get_config_value(config, path, "encoding") - return str(default_encoding) diff --git a/src/chipshot/reader/encoding.py b/src/chipshot/reader/encoding.py index a4f119a..52d9e52 100644 --- a/src/chipshot/reader/encoding.py +++ b/src/chipshot/reader/encoding.py @@ -6,14 +6,16 @@ import codecs import logging +import typing from .. import exceptions +from ..config import get_config_value from ..shared import FileInfo log = logging.getLogger(__name__) -def handle(info: FileInfo) -> None: +def handle(info: FileInfo, config: dict[str, typing.Any]) -> None: """Detect and handle the file encoding. The encoding may be determined by a byte order mark at the beginning of the file. @@ -36,6 +38,8 @@ def handle(info: FileInfo) -> None: elif info.raw_contents.startswith(codecs.BOM_UTF8): info.bom = codecs.BOM_UTF8 info.encoding = "utf-8" + else: + (info.encoding,) = get_config_value(config, info.path, "encoding") if info.bom: info.raw_contents = info.raw_contents[len(info.bom) :] diff --git a/src/chipshot/writer.py b/src/chipshot/writer.py index afd7efd..8da8ead 100644 --- a/src/chipshot/writer.py +++ b/src/chipshot/writer.py @@ -10,17 +10,30 @@ def write(file: FileInfo) -> None: - file.path.write_bytes(_render(file)) - - -def _render(file: FileInfo) -> bytes: - text: str = "" - if file.prologue: - text = f"{file.prologue}{file.newlines * 2}" - if file.header: - text += f"{file.header}{file.newlines * 2}" - if file.original_header: - text += f"{file.original_header}{file.newlines * 2}" - text += file.contents - - return file.bom + text.encode(file.encoding) + two_newlines = (file.newlines * 2).encode(file.encoding) + add_two_newlines = False + + with file.path.open("wb") as f: + f.write(file.bom) + + if file.prologue: + f.write(file.prologue.replace("\n", file.newlines).encode(file.encoding)) + add_two_newlines = True + + if file.header: + if add_two_newlines: + f.write(two_newlines) + f.write(file.header.replace("\n", file.newlines).encode(file.encoding)) + add_two_newlines = True + + if file.original_header: + if add_two_newlines: + f.write(two_newlines) + f.write( + file.original_header.replace("\n", file.newlines).encode(file.encoding) + ) + add_two_newlines = True + + if add_two_newlines: + f.write(two_newlines) + f.write(file.contents.replace("\n", file.newlines).encode(file.encoding)) diff --git a/tests/test_reader.py b/tests/test_reader.py index 54cefb2..b6dd5f9 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -10,7 +10,7 @@ def test_empty_file(fs, default_config): info = chipshot.reader.read(path, default_config) assert info.raw_contents == b"" assert info.bom == b"" - assert info.encoding == "utf-8" + assert info.encoding == "" assert info.prologue == "" assert info.original_header == "" assert info.contents == "" diff --git a/tests/test_reader_encoding.py b/tests/test_reader_encoding.py index a06224a..72a29be 100644 --- a/tests/test_reader_encoding.py +++ b/tests/test_reader_encoding.py @@ -17,12 +17,12 @@ (codecs.BOM_UTF32_LE, "utf-32-le"), ), ) -def test_bom(bogus_file, bom, encoding): +def test_bom(bogus_file, default_config, bom, encoding): raw_contents = bom + "success".encode(encoding) bogus_file.raw_contents = raw_contents bogus_file.encoding = "utf-8" - chipshot.reader.encoding.handle(bogus_file) + chipshot.reader.encoding.handle(bogus_file, default_config) assert bogus_file.encoding == encoding assert bogus_file.bom == bom @@ -41,4 +41,4 @@ def test_decode_errors(bogus_file, default_config, bom, exception): bogus_file.encoding = "utf-8" with pytest.raises(exception): - chipshot.reader.encoding.handle(bogus_file) + chipshot.reader.encoding.handle(bogus_file, default_config) diff --git a/tests/test_writer.py b/tests/test_writer.py new file mode 100644 index 0000000..f0a7d08 --- /dev/null +++ b/tests/test_writer.py @@ -0,0 +1,139 @@ +import codecs +import pathlib + +import pytest + +import chipshot.writer +from chipshot.shared import FileInfo + +encodings = pytest.mark.parametrize("encoding", ("utf-8", "shift-jis")) +newlines = pytest.mark.parametrize("newline", ("\n", "\r", "\r\n")) +boms = pytest.mark.parametrize( + "bom, encoding", + ( + (codecs.BOM_UTF32_BE, "utf-32-be"), + (codecs.BOM_UTF32_LE, "utf-32-le"), + (codecs.BOM_UTF16_BE, "utf-16-be"), + (codecs.BOM_UTF16_LE, "utf-16-le"), + (codecs.BOM_UTF8, "utf-8"), + ), +) + + +emoji = "\N{Smiling Face with Sunglasses}" + + +@encodings +def test_basic(fs, encoding): + info = FileInfo( + path=pathlib.Path("file"), + raw_contents=b"", + contents="あ", + encoding=encoding, + ) + chipshot.writer.write(info) + + assert info.path.read_bytes().decode(encoding) == "あ" + + +@boms +def test_bom(fs, bom, encoding): + info = FileInfo( + bom=bom, + encoding=encoding, + path=pathlib.Path("file"), + raw_contents=b"", + contents=emoji, + ) + chipshot.writer.write(info) + + raw_content = info.path.read_bytes() + assert raw_content.startswith(bom) + assert raw_content[len(bom) :].decode(encoding) == emoji + + +@newlines +def test_newlines(fs, newline): + info = FileInfo( + newlines=newline, + path=pathlib.Path("file"), + raw_contents=b"", + contents="1\n2\n3", + encoding="utf-8", + ) + chipshot.writer.write(info) + + assert info.path.read_bytes().count(newline.encode("utf-8")) == 2 + + +@newlines +def test_prologue(fs, newline): + info = FileInfo( + newlines=newline, + prologue="1\n2\n3", + path=pathlib.Path("file"), + raw_contents=b"", + contents="", + encoding="utf-8", + ) + chipshot.writer.write(info) + + assert info.path.read_bytes().count(newline.encode("utf-8")) == 2 + 2 + + +def test_header_with_prologue(fs): + info = FileInfo( + newlines="\r\n", + prologue="pl", + header="1\n2\n3", + path=pathlib.Path("file"), + raw_contents=b"", + contents="abc", + encoding="utf-8", + ) + chipshot.writer.write(info) + + assert info.path.read_bytes() == b"pl\r\n\r\n1\r\n2\r\n3\r\n\r\nabc" + + +def test_header_without_prologue(fs): + info = FileInfo( + newlines="\r\n", + header="1\n2\n3", + path=pathlib.Path("file"), + raw_contents=b"", + contents="abc", + encoding="utf-8", + ) + chipshot.writer.write(info) + + assert info.path.read_bytes() == b"1\r\n2\r\n3\r\n\r\nabc" + + +def test_original_header_without_new_header(fs): + info = FileInfo( + newlines="\r\n", + original_header="1\n2\n3", + path=pathlib.Path("file"), + raw_contents=b"", + contents="abc", + encoding="utf-8", + ) + chipshot.writer.write(info) + + assert info.path.read_bytes() == b"1\r\n2\r\n3\r\n\r\nabc" + + +def test_original_header_with_new_header(fs): + info = FileInfo( + newlines="\r\n", + header="hd", + original_header="1\n2\n3", + path=pathlib.Path("file"), + raw_contents=b"", + contents="abc", + encoding="utf-8", + ) + chipshot.writer.write(info) + + assert info.path.read_bytes() == b"hd\r\n\r\n1\r\n2\r\n3\r\n\r\nabc"