From 3637b64a89832b6c4ba39d037845327e75dc9f77 Mon Sep 17 00:00:00 2001 From: Rundown Rhino Date: Sun, 3 Nov 2024 13:19:48 +0000 Subject: [PATCH 01/12] Refactor some functions from the Latex cog into utilities --- bot/exts/fun/latex.py | 34 +++++----------------------------- bot/utils/codeblocks.py | 18 ++++++++++++++++++ bot/utils/images.py | 17 +++++++++++++++++ 3 files changed, 40 insertions(+), 29 deletions(-) create mode 100644 bot/utils/codeblocks.py create mode 100644 bot/utils/images.py diff --git a/bot/exts/fun/latex.py b/bot/exts/fun/latex.py index 5ce60fa163..e121600f92 100644 --- a/bot/exts/fun/latex.py +++ b/bot/exts/fun/latex.py @@ -1,13 +1,10 @@ import hashlib import os -import re import string -from io import BytesIO from pathlib import Path from typing import BinaryIO import discord -from PIL import Image from aiohttp import client_exceptions from discord.ext import commands from pydis_core.utils.logging import get_logger @@ -15,18 +12,12 @@ from bot.bot import Bot from bot.constants import Channels, WHITELISTED_CHANNELS +from bot.utils.codeblocks import prepare_input from bot.utils.decorators import whitelist_override +from bot.utils.images import process_image log = get_logger(__name__) -FORMATTED_CODE_REGEX = re.compile( - r"(?P(?P```)|``?)" # code delimiter: 1-3 backticks; (?P=block) only matches if it's a block - r"(?(block)(?:(?P[a-z]+)\n)?)" # if we're in a block, match optional language (only letters plus newline) - r"(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code - r"(?P.*?)" # extract all code inside the markup - r"\s*" # any more whitespace before the end of the code markup - r"(?P=delim)", # match the exact same delimiter from the start again - re.DOTALL | re.IGNORECASE, # "." also matches newlines, case insensitive -) + LATEX_API_URL = os.getenv("LATEX_API_URL", "https://rtex.probablyaweb.site/api/v2") PASTEBIN_URL = "https://paste.pythondiscord.com" @@ -45,24 +36,9 @@ ) -def _prepare_input(text: str) -> str: - """Extract latex from a codeblock, if it is in one.""" - if match := FORMATTED_CODE_REGEX.match(text): - return match.group("code") - return text - - def _process_image(data: bytes, out_file: BinaryIO) -> None: """Read `data` as an image file, and paste it on a white background.""" - image = Image.open(BytesIO(data)).convert("RGBA") - width, height = image.size - background = Image.new("RGBA", (width + 2 * PAD, height + 2 * PAD), "WHITE") - - # paste the image on the background, using the same image as the mask - # when an RGBA image is passed as the mask, its alpha band is used. - # this has the effect of skipping pasting the pixels where the image is transparent. - background.paste(image, (PAD, PAD), image) - background.save(out_file) + return process_image(data, out_file, PAD) class InvalidLatexError(Exception): @@ -132,7 +108,7 @@ async def _prepare_error_embed(self, err: InvalidLatexError | LatexServerError | @whitelist_override(channels=LATEX_ALLOWED_CHANNNELS) async def latex(self, ctx: commands.Context, *, query: str) -> None: """Renders the text in latex and sends the image.""" - query = _prepare_input(query) + query = prepare_input(query) # the hash of the query is used as the filename in the cache. query_hash = hashlib.md5(query.encode()).hexdigest() # noqa: S324 diff --git a/bot/utils/codeblocks.py b/bot/utils/codeblocks.py new file mode 100644 index 0000000000..55a3c9f991 --- /dev/null +++ b/bot/utils/codeblocks.py @@ -0,0 +1,18 @@ +import re + +FORMATTED_CODE_REGEX = re.compile( + r"(?P(?P```)|``?)" # code delimiter: 1-3 backticks; (?P=block) only matches if it's a block + r"(?(block)(?:(?P[a-z]+)\n)?)" # if we're in a block, match optional language (only letters plus newline) + r"(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code + r"(?P.*?)" # extract all code inside the markup + r"\s*" # any more whitespace before the end of the code markup + r"(?P=delim)", # match the exact same delimiter from the start again + re.DOTALL | re.IGNORECASE, # "." also matches newlines, case insensitive +) + + +def prepare_input(text: str) -> str: + """Extract input from a codeblock, if it is in one. Otherwise returns the entire text.""" + if match := FORMATTED_CODE_REGEX.match(text): + return match.group("code") + return text diff --git a/bot/utils/images.py b/bot/utils/images.py new file mode 100644 index 0000000000..b00243e7ad --- /dev/null +++ b/bot/utils/images.py @@ -0,0 +1,17 @@ +from io import BytesIO +from typing import BinaryIO + +from PIL import Image + + +def process_image(data: bytes, out_file: BinaryIO, pad: int) -> None: + """Read `data` as an image file, and paste it on a white background.""" + image = Image.open(BytesIO(data)).convert("RGBA") + width, height = image.size + background = Image.new("RGBA", (width + 2 * pad, height + 2 * pad), "WHITE") + + # paste the image on the background, using the same image as the mask + # when an RGBA image is passed as the mask, its alpha band is used. + # this has the effect of skipping pasting the pixels where the image is transparent. + background.paste(image, (pad, pad), image) + background.save(out_file) From 2e1881844b1584bdfe485c1e8d6b73473346ea40 Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sun, 3 Nov 2024 13:25:18 +0000 Subject: [PATCH 02/12] Use the generic function instead of this private one --- bot/exts/fun/latex.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/bot/exts/fun/latex.py b/bot/exts/fun/latex.py index e121600f92..63a16f7622 100644 --- a/bot/exts/fun/latex.py +++ b/bot/exts/fun/latex.py @@ -36,11 +36,6 @@ ) -def _process_image(data: bytes, out_file: BinaryIO) -> None: - """Read `data` as an image file, and paste it on a white background.""" - return process_image(data, out_file, PAD) - - class InvalidLatexError(Exception): """Represents an error caused by invalid latex.""" @@ -73,7 +68,7 @@ async def _generate_image(self, query: str, out_file: BinaryIO) -> None: f"{LATEX_API_URL}/{response_json['filename']}", raise_for_status=True ) as response: - _process_image(await response.read(), out_file) + process_image(await response.read(), out_file, PAD) async def _upload_to_pastebin(self, text: str) -> str | None: """Uploads `text` to the paste service, returning the url if successful.""" From 3e45b885d0c48e37afb83f1879bc62971e0d7856 Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sun, 3 Nov 2024 14:23:37 +0000 Subject: [PATCH 03/12] Working .typst implementation. Allows arbitrary packages. --- .gitignore | 1 + bot/exts/fun/typst.py | 127 +++++++++++++++++++++++++++ bot/resources/fun/typst_template.typ | 5 ++ bot/utils/typst.py | 13 +++ poetry.lock | 22 ++++- pyproject.toml | 1 + 6 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 bot/exts/fun/typst.py create mode 100644 bot/resources/fun/typst_template.typ create mode 100644 bot/utils/typst.py diff --git a/.gitignore b/.gitignore index 665df8cca7..4111b2e8a9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ log/* data/* bot/exts/fun/_latex_cache/* +bot/exts/fun/_typst_cache/* diff --git a/bot/exts/fun/typst.py b/bot/exts/fun/typst.py new file mode 100644 index 0000000000..8d8e61f24e --- /dev/null +++ b/bot/exts/fun/typst.py @@ -0,0 +1,127 @@ +import asyncio +import hashlib +import string +from concurrent.futures import ProcessPoolExecutor +from pathlib import Path +from tempfile import TemporaryDirectory + +import discord +from discord.ext import commands +from pydis_core.utils.logging import get_logger +from pydis_core.utils.paste_service import PasteFile, PasteTooLongError, PasteUploadError, send_to_paste_service + +from bot.bot import Bot +from bot.constants import Channels, WHITELISTED_CHANNELS +from bot.utils.codeblocks import prepare_input +from bot.utils.decorators import whitelist_override +from bot.utils.typst import render_typst_worker + +log = get_logger(__name__) + +PASTEBIN_URL = "https://paste.pythondiscord.com" + +THIS_DIR = Path(__file__).parent +# The cache directory used for typst. A temporary subdirectory is made for each invocation, +# which should be cleaned up automatically on success. +CACHE_DIRECTORY = THIS_DIR / "_typst_cache" +CACHE_DIRECTORY.mkdir(exist_ok=True) +TEMPLATE = string.Template(Path("bot/resources/fun/typst_template.typ").read_text()) + +MAX_CONCURRENCY = 2 + +TYPST_ALLOWED_CHANNNELS = WHITELISTED_CHANNELS + ( + Channels.data_science_and_ai, + Channels.algos_and_data_structs, + Channels.python_help, +) + +_EXECUTOR = ProcessPoolExecutor(MAX_CONCURRENCY) + + +class InvalidTypstError(Exception): + """Represents an error caused by invalid typst source code.""" + + def __init__(self, logs: str | None): + super().__init__(logs) + self.logs = logs + + +class Typst(commands.Cog): + """Renders typst.""" + + def __init__(self, bot: Bot): + self.bot = bot + + @commands.command() + @commands.max_concurrency(MAX_CONCURRENCY, commands.BucketType.guild, wait=True) + @whitelist_override(channels=TYPST_ALLOWED_CHANNNELS) + async def typst(self, ctx: commands.Context, *, query: str) -> None: + """Renders the text in typst and sends the image.""" + query = prepare_input(query) + + # the hash of the query is used as the tempdir name in the cache, + # as well as the name for the rendered file. + query_hash = hashlib.md5(query.encode()).hexdigest() # noqa: S324 + image_path = CACHE_DIRECTORY / f"{query_hash}.png" + async with ctx.typing(): + if not image_path.exists(): + try: + await self.render_typst(query, query_hash, image_path) + except InvalidTypstError as err: + embed = await self._prepare_error_embed(err) + await ctx.send(embed=embed) + image_path.unlink() + return + await ctx.send(file=discord.File(image_path, "typst.png")) + + async def render_typst(self, query: str, tempdir_name: str, image_path: Path) -> None: + """ + Renders the query as Typst. + + Does so by writing it into a file in a temporary directory, storing the output in image_path. + image_path shouldn't be in that directory or else it will be immediately deleted. + """ + with TemporaryDirectory(prefix=tempdir_name, dir=CACHE_DIRECTORY) as tempdir: + source_path = Path(tempdir) / "inp.typ" + source_path.write_text(TEMPLATE.substitute(text=query), encoding="utf-8") + try: + await asyncio.get_event_loop().run_in_executor( + _EXECUTOR, render_typst_worker, source_path, image_path + ) + except RuntimeError as e: + raise InvalidTypstError(e.args[0] if e.args else "") + + async def _prepare_error_embed( + self, err: InvalidTypstError | None + ) -> discord.Embed: + title = "There was some issue rendering your Typst, please retry later." + if isinstance(err, InvalidTypstError): + title = "Failed to render input as Typst." + + embed = discord.Embed(title=title) + embed.description = "No logs available." + logs = getattr(err, "logs", None) + if logs: + logs_paste_url = await self._upload_to_pastebin(logs) + embed.description = "Couldn't upload logs." + if logs_paste_url: + embed.description = f"[View Logs]({logs_paste_url})" + return embed + + async def _upload_to_pastebin(self, text: str) -> str | None: + """Uploads `text` to the paste service, returning the url if successful.""" + file = PasteFile(content=text, lexer="text") + try: + resp = await send_to_paste_service( + files=[file], + http_session=self.bot.http_session, + ) + return resp.link + except (PasteTooLongError, PasteUploadError) as e: + log.info("Error when uploading typst output to pastebin. %s", e) + return None + + +async def setup(bot: Bot) -> None: + """Load the Typst Cog.""" + await bot.add_cog(Typst(bot)) diff --git a/bot/resources/fun/typst_template.typ b/bot/resources/fun/typst_template.typ new file mode 100644 index 0000000000..332a33a177 --- /dev/null +++ b/bot/resources/fun/typst_template.typ @@ -0,0 +1,5 @@ +// stretch instead of producing an entire page +// margin:0cm cuts off parts of letters, so add a bit more. +#set page("a4", width: auto, height: auto, margin: 0.25cm) + +$text diff --git a/bot/utils/typst.py b/bot/utils/typst.py new file mode 100644 index 0000000000..941fa5f0db --- /dev/null +++ b/bot/utils/typst.py @@ -0,0 +1,13 @@ +from pathlib import Path + + +def render_typst_worker(source_path: Path, output_path: Path, ppi: float | None = None) -> None: + """ + Renders Typst from source to output. + + Uses the source path's parent as the typst root path. + Intended to be ran in a subprocess for concurrency and timeouts. + """ + import typst + + typst.compile(source_path, output_path, root=source_path.parent, ppi=ppi) diff --git a/poetry.lock b/poetry.lock index 4da3935dad..ad9f2c6538 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1789,6 +1789,26 @@ files = [ {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, ] +[[package]] +name = "typst" +version = "0.11.1" +description = "Python binding to typst" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typst-0.11.1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ec50fc97f8afe6da1f8787a72f77f04ddf14e007c88a1b130cb8122a405e4e63"}, + {file = "typst-0.11.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:c8af0dabc95a761b07f19c4551da9176bc1b408a9abf9d5d0ab93da3d616abf6"}, + {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d6bf84d7e1c8402241d10874a4dc4df4995dcff0a581c47b1def92cc4de322b"}, + {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:11c925fab98fdce6b3f8c498c399eed7224d7a315348aa46e2bb0d9a40af5baa"}, + {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:812b99f617aabb599508e132cdc881a751d89ad32504def774d3e3b765425872"}, + {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75313042a605c15649f23eb074c5df34bca66f5b921cd043651afd80ff3ad9f0"}, + {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffba53328e317b9b0a4e9d05573c98c7f7d4e555259e1ff5a9a2169a85232d71"}, + {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef8e44bec7a2e168a92aec2fa8008328b630d137753c0550ba654c7855d2893a"}, + {file = "typst-0.11.1-cp37-abi3-win32.whl", hash = "sha256:e2ccebab8f9fc49b2b212d6af0e349aa6cd8fc043c343bf5719aa1ef7dae354f"}, + {file = "typst-0.11.1-cp37-abi3-win_amd64.whl", hash = "sha256:3874f7910687406ee7e80de1aec58487c305b76893b50a3a869496105a5767a5"}, + {file = "typst-0.11.1.tar.gz", hash = "sha256:56a336fef1dbec6ae678d87d680a90ad07c8739bc0599b54fdd7fa351a4efa56"}, +] + [[package]] name = "urllib3" version = "2.2.2" @@ -1943,4 +1963,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.12.*" -content-hash = "56ecbd30a95a09c65cef191fda4846952082ca086eca97a46cb2547e8ec4a7de" +content-hash = "6795e79a85089141b6ae1c03b276e20a83fed0bdc15a14a624f75bbedb8b6ea8" diff --git a/pyproject.toml b/pyproject.toml index edac48d0fa..6d9bd73c4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ emoji = "2.12.1" pyjokes = "0.8.3" pydantic = { version = "2.6.4", extras = ["dotenv"]} pydantic-settings = "2.2.1" +typst = "^0.11.1" [tool.poetry.dev-dependencies] From 9e56395ba804868143219df1fd25fc2cb4ab87e1 Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sun, 3 Nov 2024 14:58:31 +0000 Subject: [PATCH 04/12] Crop output images instead of relying on typst's margin system --- bot/exts/fun/typst.py | 34 +++++++++++++++++++++++++--- bot/resources/fun/typst_template.typ | 3 +-- bot/utils/images.py | 34 +++++++++++++++++++++++++++- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/bot/exts/fun/typst.py b/bot/exts/fun/typst.py index 8d8e61f24e..ca623ef443 100644 --- a/bot/exts/fun/typst.py +++ b/bot/exts/fun/typst.py @@ -6,6 +6,7 @@ from tempfile import TemporaryDirectory import discord +from PIL import Image from discord.ext import commands from pydis_core.utils.logging import get_logger from pydis_core.utils.paste_service import PasteFile, PasteTooLongError, PasteUploadError, send_to_paste_service @@ -14,6 +15,7 @@ from bot.constants import Channels, WHITELISTED_CHANNELS from bot.utils.codeblocks import prepare_input from bot.utils.decorators import whitelist_override +from bot.utils.images import crop_background from bot.utils.typst import render_typst_worker log = get_logger(__name__) @@ -27,6 +29,8 @@ CACHE_DIRECTORY.mkdir(exist_ok=True) TEMPLATE = string.Template(Path("bot/resources/fun/typst_template.typ").read_text()) +# how many pixels to leave on each side when cropping the image to only the contents. Set to None to disable cropping. +CROP_PADDING: int | None = 10 MAX_CONCURRENCY = 2 TYPST_ALLOWED_CHANNNELS = WHITELISTED_CHANNELS + ( @@ -46,6 +50,10 @@ def __init__(self, logs: str | None): self.logs = logs +class EmptyImageError(Exception): + """Represents an error caused by the output image being empty.""" + + class Typst(commands.Cog): """Renders typst.""" @@ -72,9 +80,14 @@ async def typst(self, ctx: commands.Context, *, query: str) -> None: await ctx.send(embed=embed) image_path.unlink() return + except EmptyImageError: + await ctx.send("The output image was empty.") + return await ctx.send(file=discord.File(image_path, "typst.png")) - async def render_typst(self, query: str, tempdir_name: str, image_path: Path) -> None: + async def render_typst( + self, query: str, tempdir_name: str, image_path: Path + ) -> None: """ Renders the query as Typst. @@ -82,14 +95,29 @@ async def render_typst(self, query: str, tempdir_name: str, image_path: Path) -> image_path shouldn't be in that directory or else it will be immediately deleted. """ with TemporaryDirectory(prefix=tempdir_name, dir=CACHE_DIRECTORY) as tempdir: + raw_img_path = Path(tempdir) / image_path.with_stem("raw_out").name source_path = Path(tempdir) / "inp.typ" source_path.write_text(TEMPLATE.substitute(text=query), encoding="utf-8") try: await asyncio.get_event_loop().run_in_executor( - _EXECUTOR, render_typst_worker, source_path, image_path + _EXECUTOR, render_typst_worker, source_path, raw_img_path ) except RuntimeError as e: - raise InvalidTypstError(e.args[0] if e.args else "") + raise InvalidTypstError( + e.args[0] if e.args else "" + ) + + if CROP_PADDING is None: + raw_img_path.rename(image_path) + else: + res = crop_background( + Image.open(raw_img_path).convert("RGB"), + (255, 255, 255), + pad=CROP_PADDING, + ) + if res is None: + raise EmptyImageError + res.save(image_path) async def _prepare_error_embed( self, err: InvalidTypstError | None diff --git a/bot/resources/fun/typst_template.typ b/bot/resources/fun/typst_template.typ index 332a33a177..7f080d9634 100644 --- a/bot/resources/fun/typst_template.typ +++ b/bot/resources/fun/typst_template.typ @@ -1,5 +1,4 @@ -// stretch instead of producing an entire page // margin:0cm cuts off parts of letters, so add a bit more. -#set page("a4", width: auto, height: auto, margin: 0.25cm) +#set page("a4", height: auto, margin: 0.5cm) $text diff --git a/bot/utils/images.py b/bot/utils/images.py index b00243e7ad..d4ac742f01 100644 --- a/bot/utils/images.py +++ b/bot/utils/images.py @@ -1,7 +1,7 @@ from io import BytesIO from typing import BinaryIO -from PIL import Image +from PIL import Image, ImageChops def process_image(data: bytes, out_file: BinaryIO, pad: int) -> None: @@ -15,3 +15,35 @@ def process_image(data: bytes, out_file: BinaryIO, pad: int) -> None: # this has the effect of skipping pasting the pixels where the image is transparent. background.paste(image, (pad, pad), image) background.save(out_file) + + +def crop_background( + img: Image.Image, background_color: tuple[int, ...], pad: int = 0 +) -> Image.Image | None: + """ + Crops the image to include only the pixels that aren't the `background_color`. Optionally leaves some padding. + + If the image is totally empty, returns None if pad==0, otherwise an empty image of only the padding. + """ + if not pad >= 0: + raise ValueError(f"pad must be >=0, got {pad}") + + # https://stackoverflow.com/a/48605963 + bg = Image.new(img.mode, img.size, background_color) + diff = ImageChops.difference(img, bg) + diff = ImageChops.add(diff, diff, 2.0, -100) + bbox = diff.getbbox() + if not bbox: + if pad == 0: + return None + # empty image with padding-related sizes + bbox = (0, 0, 2 * pad, 2 * pad) + else: + l, u, r, b = bbox # noqa: E741 + bbox = ( + max(l - pad, 0), + max(u - pad, 0), + min(r + pad, img.width), + min(b + pad, img.height), + ) + return img.crop(bbox) From fc7ab5dd5487b6c43639d01ec271f18b293818cc Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sun, 3 Nov 2024 15:37:38 +0000 Subject: [PATCH 05/12] Security: disallow installing typst packages at runtime --- bot/exts/fun/typst.py | 44 ++++++++++++++++++++++++++-- bot/resources/fun/typst_packages.typ | 6 ++++ 2 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 bot/resources/fun/typst_packages.typ diff --git a/bot/exts/fun/typst.py b/bot/exts/fun/typst.py index ca623ef443..0f3472b787 100644 --- a/bot/exts/fun/typst.py +++ b/bot/exts/fun/typst.py @@ -6,10 +6,16 @@ from tempfile import TemporaryDirectory import discord +import platformdirs from PIL import Image from discord.ext import commands from pydis_core.utils.logging import get_logger -from pydis_core.utils.paste_service import PasteFile, PasteTooLongError, PasteUploadError, send_to_paste_service +from pydis_core.utils.paste_service import ( + PasteFile, + PasteTooLongError, + PasteUploadError, + send_to_paste_service, +) from bot.bot import Bot from bot.constants import Channels, WHITELISTED_CHANNELS @@ -28,6 +34,40 @@ CACHE_DIRECTORY = THIS_DIR / "_typst_cache" CACHE_DIRECTORY.mkdir(exist_ok=True) TEMPLATE = string.Template(Path("bot/resources/fun/typst_template.typ").read_text()) +PACKAGES_INSTALL_STRING = Path("bot/resources/fun/typst_packages.typ").read_text() + +TYPST_PACKAGES_DIR = platformdirs.user_cache_path("typst") / "packages" +if not TYPST_PACKAGES_DIR.exists(): + log.info( + f"The Typst package directory '{TYPST_PACKAGES_DIR}' doesn't currently exist; populating allowed packages." + ) + with TemporaryDirectory(prefix="packageinstall", dir=CACHE_DIRECTORY) as tempdir: + source_path = Path(tempdir) / "inp.typ" + output_path = Path(tempdir) / "discard.png" + source_path.write_text(PACKAGES_INSTALL_STRING, encoding="utf-8") + render_typst_worker(source_path, output_path) + if not TYPST_PACKAGES_DIR.exists(): + raise ValueError( + f"'{TYPST_PACKAGES_DIR}' still doesn't exist after installing packages - " + "this suggests the packages path is incorrect or no packages were installed." + ) + num_packages = 0 + for universe in TYPST_PACKAGES_DIR.iterdir(): + num_packages += sum(1 for _ in universe.iterdir()) + log.info( + f"Installed {num_packages} packages. Locking the packages directory against writes." + ) + # for security, remove the write permissions from typst packages + mode = 0o555 # read, execute, not write + for ( + dirpath, + dirnames, + filenames, + ) in TYPST_PACKAGES_DIR.walk(): + for d in dirnames + filenames: + (dirpath / d).chmod(mode) + TYPST_PACKAGES_DIR.chmod(mode) + # how many pixels to leave on each side when cropping the image to only the contents. Set to None to disable cropping. CROP_PADDING: int | None = 10 @@ -78,7 +118,7 @@ async def typst(self, ctx: commands.Context, *, query: str) -> None: except InvalidTypstError as err: embed = await self._prepare_error_embed(err) await ctx.send(embed=embed) - image_path.unlink() + image_path.unlink(missing_ok=True) return except EmptyImageError: await ctx.send("The output image was empty.") diff --git a/bot/resources/fun/typst_packages.typ b/bot/resources/fun/typst_packages.typ new file mode 100644 index 0000000000..df6cebf9b8 --- /dev/null +++ b/bot/resources/fun/typst_packages.typ @@ -0,0 +1,6 @@ +// This file is ran to install the allowed packages before the package directory is write-locked. +// It is NOT included into every query's template. +#import "@preview/codly:1.0.0" // code presentation (needs configuration in the document to work) +#import "@preview/cetz:0.3.0" // similar to latex's tikz +#import "@preview/fletcher:0.5.1" as fletcher // drawing diagrams; depends on cetz +#import "@preview/physica:0.9.3" // math and physics From cb2ebab1fcac421018a873393ec86363feb9145d Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sun, 3 Nov 2024 18:34:43 +0000 Subject: [PATCH 06/12] Add limits for typst rendering time and output size --- bot/__main__.py | 4 +- bot/exts/fun/typst.py | 116 ++++++++++++++++++++++++++---------------- poetry.lock | 33 +++++++++++- pyproject.toml | 1 + 4 files changed, 109 insertions(+), 45 deletions(-) diff --git a/bot/__main__.py b/bot/__main__.py index b3f346a8a3..aec1eae37c 100644 --- a/bot/__main__.py +++ b/bot/__main__.py @@ -86,4 +86,6 @@ async def main() -> None: await _bot.start(constants.Client.token.get_secret_value()) -asyncio.run(main()) +# the main-guard is needed for launching subprocesses, e.g. via anyio.to_process +if __name__ == "__main__": + asyncio.run(main()) diff --git a/bot/exts/fun/typst.py b/bot/exts/fun/typst.py index 0f3472b787..5a7680ca81 100644 --- a/bot/exts/fun/typst.py +++ b/bot/exts/fun/typst.py @@ -1,21 +1,16 @@ import asyncio import hashlib import string -from concurrent.futures import ProcessPoolExecutor from pathlib import Path from tempfile import TemporaryDirectory import discord import platformdirs from PIL import Image +from anyio.to_process import run_sync from discord.ext import commands from pydis_core.utils.logging import get_logger -from pydis_core.utils.paste_service import ( - PasteFile, - PasteTooLongError, - PasteUploadError, - send_to_paste_service, -) +from pydis_core.utils.paste_service import PasteFile, PasteTooLongError, PasteUploadError, send_to_paste_service from bot.bot import Bot from bot.constants import Channels, WHITELISTED_CHANNELS @@ -37,41 +32,14 @@ PACKAGES_INSTALL_STRING = Path("bot/resources/fun/typst_packages.typ").read_text() TYPST_PACKAGES_DIR = platformdirs.user_cache_path("typst") / "packages" -if not TYPST_PACKAGES_DIR.exists(): - log.info( - f"The Typst package directory '{TYPST_PACKAGES_DIR}' doesn't currently exist; populating allowed packages." - ) - with TemporaryDirectory(prefix="packageinstall", dir=CACHE_DIRECTORY) as tempdir: - source_path = Path(tempdir) / "inp.typ" - output_path = Path(tempdir) / "discard.png" - source_path.write_text(PACKAGES_INSTALL_STRING, encoding="utf-8") - render_typst_worker(source_path, output_path) - if not TYPST_PACKAGES_DIR.exists(): - raise ValueError( - f"'{TYPST_PACKAGES_DIR}' still doesn't exist after installing packages - " - "this suggests the packages path is incorrect or no packages were installed." - ) - num_packages = 0 - for universe in TYPST_PACKAGES_DIR.iterdir(): - num_packages += sum(1 for _ in universe.iterdir()) - log.info( - f"Installed {num_packages} packages. Locking the packages directory against writes." - ) - # for security, remove the write permissions from typst packages - mode = 0o555 # read, execute, not write - for ( - dirpath, - dirnames, - filenames, - ) in TYPST_PACKAGES_DIR.walk(): - for d in dirnames + filenames: - (dirpath / d).chmod(mode) - TYPST_PACKAGES_DIR.chmod(mode) - # how many pixels to leave on each side when cropping the image to only the contents. Set to None to disable cropping. CROP_PADDING: int | None = 10 MAX_CONCURRENCY = 2 +# max time in seconds to allow the typst process to run +TYPST_TIMEOUT: float = 1.0 +# max size of the typst (raw) output image to allow rather than emitting an error +MAX_RAW_SIZE = 2 * 1024**2 # 2MB TYPST_ALLOWED_CHANNNELS = WHITELISTED_CHANNELS + ( Channels.data_science_and_ai, @@ -79,8 +47,6 @@ Channels.python_help, ) -_EXECUTOR = ProcessPoolExecutor(MAX_CONCURRENCY) - class InvalidTypstError(Exception): """Represents an error caused by invalid typst source code.""" @@ -90,6 +56,14 @@ def __init__(self, logs: str | None): self.logs = logs +class TypstTimeoutError(Exception): + """Represents an error caused by the Typst rendering taking too long.""" + + +class OutputTooBigError(Exception): + """Represents an error caused by the Typst output image being too big.""" + + class EmptyImageError(Exception): """Represents an error caused by the output image being empty.""" @@ -99,6 +73,42 @@ class Typst(commands.Cog): def __init__(self, bot: Bot): self.bot = bot + self._setup_packages() + + def _setup_packages(self) -> None: + if TYPST_PACKAGES_DIR.exists(): + return + log.info( + f"The Typst package directory '{TYPST_PACKAGES_DIR}' doesn't currently exist; populating allowed packages." + ) + with TemporaryDirectory( + prefix="packageinstall", dir=CACHE_DIRECTORY + ) as tempdir: + source_path = Path(tempdir) / "inp.typ" + output_path = Path(tempdir) / "discard.png" + source_path.write_text(PACKAGES_INSTALL_STRING, encoding="utf-8") + render_typst_worker(source_path, output_path) + if not TYPST_PACKAGES_DIR.exists(): + raise ValueError( + f"'{TYPST_PACKAGES_DIR}' still doesn't exist after installing packages - " + "this suggests the packages path is incorrect or no packages were installed." + ) + num_packages = 0 + for universe in TYPST_PACKAGES_DIR.iterdir(): + num_packages += sum(1 for _ in universe.iterdir()) + log.info( + f"Installed {num_packages} packages. Locking the packages directory against writes." + ) + # for security, remove the write permissions from typst packages + mode = 0o555 # read, execute, not write + for ( + dirpath, + dirnames, + filenames, + ) in TYPST_PACKAGES_DIR.walk(): + for d in dirnames + filenames: + (dirpath / d).chmod(mode) + TYPST_PACKAGES_DIR.chmod(mode) @commands.command() @commands.max_concurrency(MAX_CONCURRENCY, commands.BucketType.guild, wait=True) @@ -123,6 +133,17 @@ async def typst(self, ctx: commands.Context, *, query: str) -> None: except EmptyImageError: await ctx.send("The output image was empty.") return + except TypstTimeoutError: + await ctx.send( + f"Typst rendering took too long (current timeout is {TYPST_TIMEOUT}s)." + ) + image_path.unlink(missing_ok=True) + return + except OutputTooBigError: + await ctx.send( + f"Typst output was too big (current limit is {MAX_RAW_SIZE/1024**2:.1f}MB.)" + ) + return await ctx.send(file=discord.File(image_path, "typst.png")) async def render_typst( @@ -139,13 +160,22 @@ async def render_typst( source_path = Path(tempdir) / "inp.typ" source_path.write_text(TEMPLATE.substitute(text=query), encoding="utf-8") try: - await asyncio.get_event_loop().run_in_executor( - _EXECUTOR, render_typst_worker, source_path, raw_img_path - ) + async with asyncio.timeout(TYPST_TIMEOUT): + await run_sync( + render_typst_worker, + source_path, + raw_img_path, + cancellable=True, + ) except RuntimeError as e: raise InvalidTypstError( e.args[0] if e.args else "" ) + except TimeoutError: + raise TypstTimeoutError + + if raw_img_path.stat().st_size > MAX_RAW_SIZE: + raise OutputTooBigError if CROP_PADDING is None: raw_img_path.rename(image_path) diff --git a/poetry.lock b/poetry.lock index ad9f2c6538..4b40bc409f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -146,6 +146,26 @@ files = [ {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, ] +[[package]] +name = "anyio" +version = "4.6.2.post1" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.9" +files = [ + {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"}, + {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"}, +] + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"] +trio = ["trio (>=0.26.1)"] + [[package]] name = "arrow" version = "1.3.0" @@ -1706,6 +1726,17 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + [[package]] name = "sortedcontainers" version = "2.4.0" @@ -1963,4 +1994,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.12.*" -content-hash = "6795e79a85089141b6ae1c03b276e20a83fed0bdc15a14a624f75bbedb8b6ea8" +content-hash = "10be1c2e7a594408a281c4037f21f0af66b1815118ef75dacb33bfd72ffcf03a" diff --git a/pyproject.toml b/pyproject.toml index 6d9bd73c4e..29f3009612 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ pyjokes = "0.8.3" pydantic = { version = "2.6.4", extras = ["dotenv"]} pydantic-settings = "2.2.1" typst = "^0.11.1" +anyio = "^4.6.2.post1" [tool.poetry.dev-dependencies] From 2b60e966ed7ee2b78337d7a15a9b03d9ce9494cf Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:07:31 +0000 Subject: [PATCH 07/12] Update typst to 0.12.0 --- bot/resources/fun/typst_packages.typ | 4 ++-- poetry.lock | 26 +++++++++++++------------- pyproject.toml | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/bot/resources/fun/typst_packages.typ b/bot/resources/fun/typst_packages.typ index df6cebf9b8..ce79ade15b 100644 --- a/bot/resources/fun/typst_packages.typ +++ b/bot/resources/fun/typst_packages.typ @@ -1,6 +1,6 @@ // This file is ran to install the allowed packages before the package directory is write-locked. // It is NOT included into every query's template. #import "@preview/codly:1.0.0" // code presentation (needs configuration in the document to work) -#import "@preview/cetz:0.3.0" // similar to latex's tikz -#import "@preview/fletcher:0.5.1" as fletcher // drawing diagrams; depends on cetz +#import "@preview/cetz:0.3.1" // similar to latex's tikz +#import "@preview/fletcher:0.5.2" // drawing diagrams; depends on cetz #import "@preview/physica:0.9.3" // math and physics diff --git a/poetry.lock b/poetry.lock index 4b40bc409f..6ad6ff5470 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1822,22 +1822,22 @@ files = [ [[package]] name = "typst" -version = "0.11.1" +version = "0.12.0" description = "Python binding to typst" optional = false python-versions = ">=3.7" files = [ - {file = "typst-0.11.1-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ec50fc97f8afe6da1f8787a72f77f04ddf14e007c88a1b130cb8122a405e4e63"}, - {file = "typst-0.11.1-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:c8af0dabc95a761b07f19c4551da9176bc1b408a9abf9d5d0ab93da3d616abf6"}, - {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d6bf84d7e1c8402241d10874a4dc4df4995dcff0a581c47b1def92cc4de322b"}, - {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:11c925fab98fdce6b3f8c498c399eed7224d7a315348aa46e2bb0d9a40af5baa"}, - {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:812b99f617aabb599508e132cdc881a751d89ad32504def774d3e3b765425872"}, - {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75313042a605c15649f23eb074c5df34bca66f5b921cd043651afd80ff3ad9f0"}, - {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffba53328e317b9b0a4e9d05573c98c7f7d4e555259e1ff5a9a2169a85232d71"}, - {file = "typst-0.11.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef8e44bec7a2e168a92aec2fa8008328b630d137753c0550ba654c7855d2893a"}, - {file = "typst-0.11.1-cp37-abi3-win32.whl", hash = "sha256:e2ccebab8f9fc49b2b212d6af0e349aa6cd8fc043c343bf5719aa1ef7dae354f"}, - {file = "typst-0.11.1-cp37-abi3-win_amd64.whl", hash = "sha256:3874f7910687406ee7e80de1aec58487c305b76893b50a3a869496105a5767a5"}, - {file = "typst-0.11.1.tar.gz", hash = "sha256:56a336fef1dbec6ae678d87d680a90ad07c8739bc0599b54fdd7fa351a4efa56"}, + {file = "typst-0.12.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:a464184f6d8c327fbb28c51ab9dd1d5ee1b49ffa3b252977df7f8b2346bd3d3f"}, + {file = "typst-0.12.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:f9b28aaae36637661421216cec81b7f7c55eafc80343266dd7fa67201f7c4c7e"}, + {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8537f151d339304576173a4ce48d7f8e1806729a32c09393b3800b89653b88f4"}, + {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:714c716300d073eb51f87e5c6ef743e4319d08fb693d5ad53f17d1cdbdaa986c"}, + {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb2c2899fd85ab5e2a708811e94a2411bc3143152b328482f0b23d96006e18c1"}, + {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:571b01f8fb4c161288143628528596ce6573e7dce867241f1451e4c250862576"}, + {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0442734f24b5abeb66db4690016a65a933617141ed26f43af95281b0b0e27a7"}, + {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28e53c2a123c3ea825a95ab7101202ae548a61cb773c80b0db5793570893f26a"}, + {file = "typst-0.12.0-cp37-abi3-win32.whl", hash = "sha256:69a8ef18fbcb6bb10376d111597e464a1d80b63586a5a5324f6b4a3e1bb6f6a5"}, + {file = "typst-0.12.0-cp37-abi3-win_amd64.whl", hash = "sha256:f8d12ba034ce3d2579bc4284a99e6bab5dd83440cd643a25546997b6b58041f8"}, + {file = "typst-0.12.0.tar.gz", hash = "sha256:e3575e0a0a6e323b6e9fb367e838a4caa947112e38ee32911cdaaa9c85176ac5"}, ] [[package]] @@ -1994,4 +1994,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.12.*" -content-hash = "10be1c2e7a594408a281c4037f21f0af66b1815118ef75dacb33bfd72ffcf03a" +content-hash = "212ca55d9f279d0c28568ed456d56c80cd758273cae56006b1c473b06f38d61d" diff --git a/pyproject.toml b/pyproject.toml index 29f3009612..2d13ffbbfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ emoji = "2.12.1" pyjokes = "0.8.3" pydantic = { version = "2.6.4", extras = ["dotenv"]} pydantic-settings = "2.2.1" -typst = "^0.11.1" +typst = "^0.12.0" anyio = "^4.6.2.post1" From 73ebf9807494506d48244c473a374e6070f0c1df Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Fri, 8 Nov 2024 17:52:26 +0000 Subject: [PATCH 08/12] Don't write output file and add rlimit --- bot/exts/fun/typst.py | 55 ++++++++++++++++++++++++++++++++---------- bot/utils/typst.py | 56 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 14 deletions(-) diff --git a/bot/exts/fun/typst.py b/bot/exts/fun/typst.py index 5a7680ca81..6e8130e341 100644 --- a/bot/exts/fun/typst.py +++ b/bot/exts/fun/typst.py @@ -1,16 +1,25 @@ import asyncio import hashlib import string +from functools import partial +from io import BytesIO from pathlib import Path +from pickle import PicklingError # noqa: S403 from tempfile import TemporaryDirectory import discord import platformdirs from PIL import Image +from anyio import BrokenWorkerProcess from anyio.to_process import run_sync from discord.ext import commands from pydis_core.utils.logging import get_logger -from pydis_core.utils.paste_service import PasteFile, PasteTooLongError, PasteUploadError, send_to_paste_service +from pydis_core.utils.paste_service import ( + PasteFile, + PasteTooLongError, + PasteUploadError, + send_to_paste_service, +) from bot.bot import Bot from bot.constants import Channels, WHITELISTED_CHANNELS @@ -38,6 +47,9 @@ MAX_CONCURRENCY = 2 # max time in seconds to allow the typst process to run TYPST_TIMEOUT: float = 1.0 +# memory limit (in bytes) to set via RLIMIT_AS for the child process. +# Typst uses a lot of RAM when compiling, so this seems to need to be >500MB. +TYPST_MEMORY_LIMIT: int = 1000 * 1024**2 # 1GB, which is pretty generous # max size of the typst (raw) output image to allow rather than emitting an error MAX_RAW_SIZE = 2 * 1024**2 # 2MB @@ -68,6 +80,10 @@ class EmptyImageError(Exception): """Represents an error caused by the output image being empty.""" +class TypstWorkerCrashedError(Exception): + """Represents an error caused by Typst rendering process crashing. This can mean the memory limit was exceeded.""" + + class Typst(commands.Cog): """Renders typst.""" @@ -85,9 +101,8 @@ def _setup_packages(self) -> None: prefix="packageinstall", dir=CACHE_DIRECTORY ) as tempdir: source_path = Path(tempdir) / "inp.typ" - output_path = Path(tempdir) / "discard.png" source_path.write_text(PACKAGES_INSTALL_STRING, encoding="utf-8") - render_typst_worker(source_path, output_path) + render_typst_worker(source_path) if not TYPST_PACKAGES_DIR.exists(): raise ValueError( f"'{TYPST_PACKAGES_DIR}' still doesn't exist after installing packages - " @@ -144,6 +159,12 @@ async def typst(self, ctx: commands.Context, *, query: str) -> None: f"Typst output was too big (current limit is {MAX_RAW_SIZE/1024**2:.1f}MB.)" ) return + except TypstWorkerCrashedError: + await ctx.send( + "Worker process crashed. " + f"Perhaps the memory limit of {TYPST_MEMORY_LIMIT/1024**2:.1f}MB was exceeded?" + ) + return await ctx.send(file=discord.File(image_path, "typst.png")) async def render_typst( @@ -156,15 +177,16 @@ async def render_typst( image_path shouldn't be in that directory or else it will be immediately deleted. """ with TemporaryDirectory(prefix=tempdir_name, dir=CACHE_DIRECTORY) as tempdir: - raw_img_path = Path(tempdir) / image_path.with_stem("raw_out").name source_path = Path(tempdir) / "inp.typ" source_path.write_text(TEMPLATE.substitute(text=query), encoding="utf-8") try: async with asyncio.timeout(TYPST_TIMEOUT): - await run_sync( - render_typst_worker, - source_path, - raw_img_path, + raw_img = await run_sync( + partial( + render_typst_worker, + source_path, + mem_rlimit=TYPST_MEMORY_LIMIT, + ), cancellable=True, ) except RuntimeError as e: @@ -173,15 +195,24 @@ async def render_typst( ) except TimeoutError: raise TypstTimeoutError - - if raw_img_path.stat().st_size > MAX_RAW_SIZE: + except BrokenWorkerProcess: + raise TypstWorkerCrashedError + except PicklingError as e: + # if the child process runs out of memory hard enough, it can fail to even send back an exception, + # which results in this error + if "PanicException" in str(e): + raise TypstWorkerCrashedError + raise + + if len(raw_img) > MAX_RAW_SIZE: + log.debug(len(raw_img)) raise OutputTooBigError if CROP_PADDING is None: - raw_img_path.rename(image_path) + image_path.write_bytes(raw_img) else: res = crop_background( - Image.open(raw_img_path).convert("RGB"), + Image.open(BytesIO(raw_img)).convert("RGB"), (255, 255, 255), pad=CROP_PADDING, ) diff --git a/bot/utils/typst.py b/bot/utils/typst.py index 941fa5f0db..0b9c901f87 100644 --- a/bot/utils/typst.py +++ b/bot/utils/typst.py @@ -1,7 +1,50 @@ +import resource +import warnings +from collections.abc import Iterator +from contextlib import contextmanager, nullcontext +from multiprocessing import parent_process from pathlib import Path +from typing import Literal -def render_typst_worker(source_path: Path, output_path: Path, ppi: float | None = None) -> None: +@contextmanager +def rlimit_context( + resource_id: int, soft_value: int | None = None, hard_value: int | None = None +) -> Iterator[None]: + """ + Context manager to temporarily set an rlimit to a value and restore it on exit. + + A value of None for one of the limits means that the current value will be used. + This should typically be used in a subprocess, and will emit a warning if used from the main process. + """ + if soft_value is None and hard_value is None: + warnings.warn( + "rlimit_context does nothing if both soft_value and hard_value are None", + stacklevel=2, + ) + if parent_process() is None: + warnings.warn("rlimit_context used from main process", stacklevel=2) + + old_soft, old_hard = resource.getrlimit(resource_id) + new_soft, new_hard = old_soft, old_hard + if soft_value is not None: + new_soft = soft_value + if hard_value is not None: + new_hard = hard_value + + resource.setrlimit(resource_id, (new_soft, new_hard)) + try: + yield + finally: + resource.setrlimit(resource_id, (old_soft, old_hard)) + + +def render_typst_worker( + source_path: Path, + format: Literal["pdf", "svg", "png"] = "png", + ppi: float | None = None, + mem_rlimit: int | None = None, +) -> bytes: """ Renders Typst from source to output. @@ -10,4 +53,13 @@ def render_typst_worker(source_path: Path, output_path: Path, ppi: float | None """ import typst - typst.compile(source_path, output_path, root=source_path.parent, ppi=ppi) + ctx = ( + rlimit_context(resource.RLIMIT_AS, soft_value=mem_rlimit) + if mem_rlimit is not None + else nullcontext() + ) + with ctx: + res = typst.compile( + source_path, format=format, root=source_path.parent, ppi=ppi + ) + return res From fe5a4bfe9a38216532b402b7debbb25cfcbf70f9 Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sat, 9 Nov 2024 18:08:47 +0000 Subject: [PATCH 09/12] Use the typst CLI to prevent high memory usage issues --- bot/constants.py | 17 +++++ bot/exts/fun/typst.py | 140 ++++++++++++++++++++++++++---------------- bot/utils/archives.py | 44 +++++++++++++ bot/utils/typst.py | 107 ++++++++++++++++++-------------- poetry.lock | 53 +--------------- pyproject.toml | 2 - 6 files changed, 210 insertions(+), 153 deletions(-) create mode 100644 bot/utils/archives.py diff --git a/bot/constants.py b/bot/constants.py index 3d24ac1e33..42c8d21210 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -305,6 +305,23 @@ class _Reddit(EnvConfig, env_prefix="reddit_"): Reddit = _Reddit() + +# "typst_" is the prefix for Typst's own envvars, so "typstext_" +class _Typst(EnvConfig, env_prefix="typstext_"): + # the path to the typst binary that will be used. + # the Typst cog can download it to this path automatically + typst_path: str = "bot/exts/fun/_typst_cache/typst" + + # fetching configuration. note that the defaults assume Linux on x86_64 + + # the direct url to fetch a typst release archive from. It will be unpacked and the executable from it used. + typst_archive_url: str = "https://github.com/typst/typst/releases/download/v0.12.0/typst-x86_64-unknown-linux-musl.tar.xz" + # SHA256 hex digest the archive at typst_archive_url will be checked against. can be obtained by sha256sum + typst_archive_sha256: str = "605130a770ebd59a4a579673079cb913a13e75985231657a71d6239a57539ec3" + + +Typst = _Typst() + # Default role combinations MODERATION_ROLES = {Roles.moderation_team, Roles.admins, Roles.owners} STAFF_ROLES = {Roles.helpers, Roles.moderation_team, Roles.admins, Roles.owners} diff --git a/bot/exts/fun/typst.py b/bot/exts/fun/typst.py index 6e8130e341..1c14c21932 100644 --- a/bot/exts/fun/typst.py +++ b/bot/exts/fun/typst.py @@ -1,17 +1,14 @@ import asyncio import hashlib import string -from functools import partial +import sys from io import BytesIO from pathlib import Path -from pickle import PicklingError # noqa: S403 from tempfile import TemporaryDirectory import discord import platformdirs from PIL import Image -from anyio import BrokenWorkerProcess -from anyio.to_process import run_sync from discord.ext import commands from pydis_core.utils.logging import get_logger from pydis_core.utils.paste_service import ( @@ -22,11 +19,12 @@ ) from bot.bot import Bot -from bot.constants import Channels, WHITELISTED_CHANNELS +from bot.constants import Channels, Typst as Config, WHITELISTED_CHANNELS +from bot.utils.archives import archive_retrieve_file from bot.utils.codeblocks import prepare_input from bot.utils.decorators import whitelist_override from bot.utils.images import crop_background -from bot.utils.typst import render_typst_worker +from bot.utils.typst import compile_typst log = get_logger(__name__) @@ -40,18 +38,22 @@ TEMPLATE = string.Template(Path("bot/resources/fun/typst_template.typ").read_text()) PACKAGES_INSTALL_STRING = Path("bot/resources/fun/typst_packages.typ").read_text() +# the default typst packages directory. TYPST_PACKAGES_DIR = platformdirs.user_cache_path("typst") / "packages" # how many pixels to leave on each side when cropping the image to only the contents. Set to None to disable cropping. CROP_PADDING: int | None = 10 -MAX_CONCURRENCY = 2 +# commands.max_concurrency limit for .typst +MAX_CONCURRENCY: int = 2 # max time in seconds to allow the typst process to run TYPST_TIMEOUT: float = 1.0 # memory limit (in bytes) to set via RLIMIT_AS for the child process. -# Typst uses a lot of RAM when compiling, so this seems to need to be >500MB. -TYPST_MEMORY_LIMIT: int = 1000 * 1024**2 # 1GB, which is pretty generous -# max size of the typst (raw) output image to allow rather than emitting an error +TYPST_MEMORY_LIMIT: int = 200 * 1024**2 # 200MB, which is pretty generous +# max size of the typst output image (before cropping) to allow rather than emitting an error MAX_RAW_SIZE = 2 * 1024**2 # 2MB +# if set, limits the internal parallelism of the typst subprocess (--jobs argument). +WORKER_JOBS: int | None = None + TYPST_ALLOWED_CHANNNELS = WHITELISTED_CHANNELS + ( Channels.data_science_and_ai, @@ -89,9 +91,48 @@ class Typst(commands.Cog): def __init__(self, bot: Bot): self.bot = bot - self._setup_packages() - def _setup_packages(self) -> None: + async def _setup_typst(self) -> None: + await self._ensure_typst_executable() + await self._setup_packages() + + async def _ensure_typst_executable(self) -> None: + path = Path(Config.typst_path).resolve() + if path.exists(): + if not path.is_file(): + raise ValueError("Typst path exists but doesn't point to a file:", path) + else: + log.info("Typst executable not found at '%s', downloading", path) + await self._download_typst_executable() + proc = await asyncio.subprocess.create_subprocess_exec( + path, + "--version", + stdin=asyncio.subprocess.DEVNULL, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, _ = await proc.communicate() + log.info(f"Typst executable reports itself as {stdout.decode('utf-8').strip()!r}.") + + async def _download_typst_executable(self) -> None: + if not Config.typst_archive_url: + raise ValueError("Trying to download Typst but the archive URL isn't set") + async with self.bot.http_session.get( + Config.typst_archive_url, raise_for_status=True + ) as response: + arc_data = await response.read() + log.info("Retrieved Typst archive, unpacking") + typst_executable = archive_retrieve_file( + arc_data, + filename="typst.exe" if sys.platform == "win32" else "typst", + ) + log.info("Storing the typst executable on disk") + exe_path = Path(Config.typst_path) + exe_path.parent.mkdir(exist_ok=True) + exe_path.write_bytes(typst_executable) + exe_path.chmod(0o555) # read, execute, not write + + async def _setup_packages(self) -> None: if TYPST_PACKAGES_DIR.exists(): return log.info( @@ -100,9 +141,8 @@ def _setup_packages(self) -> None: with TemporaryDirectory( prefix="packageinstall", dir=CACHE_DIRECTORY ) as tempdir: - source_path = Path(tempdir) / "inp.typ" - source_path.write_text(PACKAGES_INSTALL_STRING, encoding="utf-8") - render_typst_worker(source_path) + await compile_typst(PACKAGES_INSTALL_STRING, root_path=Path(tempdir)) + if not TYPST_PACKAGES_DIR.exists(): raise ValueError( f"'{TYPST_PACKAGES_DIR}' still doesn't exist after installing packages - " @@ -160,6 +200,7 @@ async def typst(self, ctx: commands.Context, *, query: str) -> None: ) return except TypstWorkerCrashedError: + log.exception("typst worker crashed") await ctx.send( "Worker process crashed. " f"Perhaps the memory limit of {TYPST_MEMORY_LIMIT/1024**2:.1f}MB was exceeded?" @@ -171,23 +212,21 @@ async def render_typst( self, query: str, tempdir_name: str, image_path: Path ) -> None: """ - Renders the query as Typst. + Renders the query as Typst to PNG. - Does so by writing it into a file in a temporary directory, storing the output in image_path. - image_path shouldn't be in that directory or else it will be immediately deleted. + If successful, the processed output is stored in `image_path`. + `tempdir_name` under cache will be the temporary root directory to be used. """ + source = TEMPLATE.substitute(text=query) with TemporaryDirectory(prefix=tempdir_name, dir=CACHE_DIRECTORY) as tempdir: - source_path = Path(tempdir) / "inp.typ" - source_path.write_text(TEMPLATE.substitute(text=query), encoding="utf-8") try: async with asyncio.timeout(TYPST_TIMEOUT): - raw_img = await run_sync( - partial( - render_typst_worker, - source_path, - mem_rlimit=TYPST_MEMORY_LIMIT, - ), - cancellable=True, + res = await compile_typst( + source, + root_path=Path(tempdir), + format="png", + mem_rlimit=TYPST_MEMORY_LIMIT, + jobs=WORKER_JOBS, ) except RuntimeError as e: raise InvalidTypstError( @@ -195,30 +234,25 @@ async def render_typst( ) except TimeoutError: raise TypstTimeoutError - except BrokenWorkerProcess: - raise TypstWorkerCrashedError - except PicklingError as e: - # if the child process runs out of memory hard enough, it can fail to even send back an exception, - # which results in this error - if "PanicException" in str(e): - raise TypstWorkerCrashedError - raise - - if len(raw_img) > MAX_RAW_SIZE: - log.debug(len(raw_img)) - raise OutputTooBigError - - if CROP_PADDING is None: - image_path.write_bytes(raw_img) - else: - res = crop_background( - Image.open(BytesIO(raw_img)).convert("RGB"), - (255, 255, 255), - pad=CROP_PADDING, - ) - if res is None: - raise EmptyImageError - res.save(image_path) + + raw_img = res.output + if len(raw_img) > MAX_RAW_SIZE: + log.debug( + "Raw image rejected for having size %.1f MB", len(raw_img) / 1024**2 + ) + raise OutputTooBigError + + if CROP_PADDING is None: + image_path.write_bytes(raw_img) + else: + res = crop_background( + Image.open(BytesIO(raw_img)).convert("RGB"), + (255, 255, 255), + pad=CROP_PADDING, + ) + if res is None: + raise EmptyImageError + res.save(image_path) async def _prepare_error_embed( self, err: InvalidTypstError | None @@ -253,4 +287,6 @@ async def _upload_to_pastebin(self, text: str) -> str | None: async def setup(bot: Bot) -> None: """Load the Typst Cog.""" - await bot.add_cog(Typst(bot)) + cog = Typst(bot) + await cog._setup_typst() + await bot.add_cog(cog) diff --git a/bot/utils/archives.py b/bot/utils/archives.py new file mode 100644 index 0000000000..55bf8f5d62 --- /dev/null +++ b/bot/utils/archives.py @@ -0,0 +1,44 @@ +import tarfile +import typing +import zipfile +from io import BytesIO +from pathlib import PurePath + + +def _tar_retrieve_file(archive_data: typing.BinaryIO, filename: str) -> bytes: + with tarfile.open(fileobj=archive_data) as arc: + for el in arc.getmembers(): + if PurePath(el.name).name == filename: + fo = arc.extractfile(el) + if fo is None: + raise ValueError( + "Member has the right name but couldn't extract:", el + ) + return fo.read() + raise FileNotFoundError("No member with this name was found in archive:", filename) + + +def _zip_retrieve_file(archive_data: typing.BinaryIO, filename: str) -> bytes: + with zipfile.ZipFile(file=archive_data) as arc: + for el in arc.filelist: + if PurePath(el.filename).name == filename: + return arc.read(el) + raise FileNotFoundError("No member with this name was found in archive:", filename) + + +def archive_retrieve_file( + archive_data: bytes | typing.BinaryIO, filename: str +) -> bytes: + """Retrieves a single file by filename (not by full path) from a tar or zip archive in memory.""" + if isinstance(archive_data, bytes | bytearray | memoryview): + archive_data = BytesIO(archive_data) + if tarfile.is_tarfile(archive_data): + return _tar_retrieve_file(archive_data, filename) + try: + return _zip_retrieve_file(archive_data, filename) + except zipfile.BadZipFile as e: + if "File is not a zip file" in str(e): + raise ValueError( + "Archive unsupported: was neither a valid tarfile nor a valid zipfile" + ) + raise diff --git a/bot/utils/typst.py b/bot/utils/typst.py index 0b9c901f87..818b8b6392 100644 --- a/bot/utils/typst.py +++ b/bot/utils/typst.py @@ -1,65 +1,78 @@ +import asyncio.subprocess +import contextlib import resource -import warnings -from collections.abc import Iterator -from contextlib import contextmanager, nullcontext -from multiprocessing import parent_process +from dataclasses import dataclass +from functools import partial from pathlib import Path from typing import Literal +from bot.constants import Typst as Config -@contextmanager -def rlimit_context( - resource_id: int, soft_value: int | None = None, hard_value: int | None = None -) -> Iterator[None]: - """ - Context manager to temporarily set an rlimit to a value and restore it on exit. - A value of None for one of the limits means that the current value will be used. - This should typically be used in a subprocess, and will emit a warning if used from the main process. - """ - if soft_value is None and hard_value is None: - warnings.warn( - "rlimit_context does nothing if both soft_value and hard_value are None", - stacklevel=2, - ) - if parent_process() is None: - warnings.warn("rlimit_context used from main process", stacklevel=2) +def _set_limits(mem_rlimit: int | None = None) -> None: + if mem_rlimit is not None: + resource.setrlimit(resource.RLIMIT_AS, (mem_rlimit, -1)) - old_soft, old_hard = resource.getrlimit(resource_id) - new_soft, new_hard = old_soft, old_hard - if soft_value is not None: - new_soft = soft_value - if hard_value is not None: - new_hard = hard_value - resource.setrlimit(resource_id, (new_soft, new_hard)) - try: - yield - finally: - resource.setrlimit(resource_id, (old_soft, old_hard)) +@dataclass +class TypstCompileResult: + """Result of Typst compilation.""" + + output: bytes + stderr: bytes -def render_typst_worker( - source_path: Path, +async def compile_typst( + source: str, + root_path: Path, format: Literal["pdf", "svg", "png"] = "png", ppi: float | None = None, mem_rlimit: int | None = None, -) -> bytes: + jobs: int | None = None, +) -> TypstCompileResult: """ - Renders Typst from source to output. + Renders Typst in a subprocess. + + Since malicious Typst source can take arbitrary resources to compile, + this should be ran with a timeout, and ideally a `mem_rlimit`. + `root_path` should be a path to a directory where all the files (if any) + that should be accessible are placed. - Uses the source path's parent as the typst root path. - Intended to be ran in a subprocess for concurrency and timeouts. """ - import typst + typst_path = Path(Config.typst_path).resolve() + if not typst_path.exists(): + raise FileNotFoundError("Typst executable was not found at path", typst_path) + if not root_path.is_dir(): + raise ValueError("Root directory was not a directory", root_path) + + args = [ + "compile", + "--root", + root_path, + "--format", + format, + ] + if ppi is not None: + args += ["--ppi", str(ppi)] + if jobs is not None: + args += ["--jobs", str(jobs)] + # input and output from CLI + args += ["-", "-"] - ctx = ( - rlimit_context(resource.RLIMIT_AS, soft_value=mem_rlimit) - if mem_rlimit is not None - else nullcontext() - ) - with ctx: - res = typst.compile( - source_path, format=format, root=source_path.parent, ppi=ppi + try: + proc = await asyncio.subprocess.create_subprocess_exec( + typst_path, + *args, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + preexec_fn=partial(_set_limits, mem_rlimit=mem_rlimit), ) - return res + + stdout, stderr = await proc.communicate(input=source.encode("utf-8")) + # if the task is cancelled or any other problem happens, make sure to kill the worker + except BaseException: + with contextlib.suppress(UnboundLocalError): + proc.kill() + raise + return TypstCompileResult(output=stdout, stderr=stderr) diff --git a/poetry.lock b/poetry.lock index 6ad6ff5470..4da3935dad 100644 --- a/poetry.lock +++ b/poetry.lock @@ -146,26 +146,6 @@ files = [ {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, ] -[[package]] -name = "anyio" -version = "4.6.2.post1" -description = "High level compatibility layer for multiple asynchronous event loop implementations" -optional = false -python-versions = ">=3.9" -files = [ - {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"}, - {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"}, -] - -[package.dependencies] -idna = ">=2.8" -sniffio = ">=1.1" - -[package.extras] -doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"] -trio = ["trio (>=0.26.1)"] - [[package]] name = "arrow" version = "1.3.0" @@ -1726,17 +1706,6 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] -[[package]] -name = "sniffio" -version = "1.3.1" -description = "Sniff out which async library your code is running under" -optional = false -python-versions = ">=3.7" -files = [ - {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, - {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, -] - [[package]] name = "sortedcontainers" version = "2.4.0" @@ -1820,26 +1789,6 @@ files = [ {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, ] -[[package]] -name = "typst" -version = "0.12.0" -description = "Python binding to typst" -optional = false -python-versions = ">=3.7" -files = [ - {file = "typst-0.12.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:a464184f6d8c327fbb28c51ab9dd1d5ee1b49ffa3b252977df7f8b2346bd3d3f"}, - {file = "typst-0.12.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:f9b28aaae36637661421216cec81b7f7c55eafc80343266dd7fa67201f7c4c7e"}, - {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8537f151d339304576173a4ce48d7f8e1806729a32c09393b3800b89653b88f4"}, - {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:714c716300d073eb51f87e5c6ef743e4319d08fb693d5ad53f17d1cdbdaa986c"}, - {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb2c2899fd85ab5e2a708811e94a2411bc3143152b328482f0b23d96006e18c1"}, - {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:571b01f8fb4c161288143628528596ce6573e7dce867241f1451e4c250862576"}, - {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0442734f24b5abeb66db4690016a65a933617141ed26f43af95281b0b0e27a7"}, - {file = "typst-0.12.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28e53c2a123c3ea825a95ab7101202ae548a61cb773c80b0db5793570893f26a"}, - {file = "typst-0.12.0-cp37-abi3-win32.whl", hash = "sha256:69a8ef18fbcb6bb10376d111597e464a1d80b63586a5a5324f6b4a3e1bb6f6a5"}, - {file = "typst-0.12.0-cp37-abi3-win_amd64.whl", hash = "sha256:f8d12ba034ce3d2579bc4284a99e6bab5dd83440cd643a25546997b6b58041f8"}, - {file = "typst-0.12.0.tar.gz", hash = "sha256:e3575e0a0a6e323b6e9fb367e838a4caa947112e38ee32911cdaaa9c85176ac5"}, -] - [[package]] name = "urllib3" version = "2.2.2" @@ -1994,4 +1943,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "3.12.*" -content-hash = "212ca55d9f279d0c28568ed456d56c80cd758273cae56006b1c473b06f38d61d" +content-hash = "56ecbd30a95a09c65cef191fda4846952082ca086eca97a46cb2547e8ec4a7de" diff --git a/pyproject.toml b/pyproject.toml index 2d13ffbbfd..edac48d0fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,6 @@ emoji = "2.12.1" pyjokes = "0.8.3" pydantic = { version = "2.6.4", extras = ["dotenv"]} pydantic-settings = "2.2.1" -typst = "^0.12.0" -anyio = "^4.6.2.post1" [tool.poetry.dev-dependencies] From 81ffc6577a12c8fc26752bb6a5f26cf151e09f3a Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sat, 9 Nov 2024 19:08:22 +0000 Subject: [PATCH 10/12] Better handling of errors in typst process --- bot/exts/fun/typst.py | 21 +++++++++++++++------ bot/utils/typst.py | 14 +++++++++++--- pyproject.toml | 1 + 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/bot/exts/fun/typst.py b/bot/exts/fun/typst.py index 1c14c21932..7d0d76ea8c 100644 --- a/bot/exts/fun/typst.py +++ b/bot/exts/fun/typst.py @@ -4,6 +4,7 @@ import sys from io import BytesIO from pathlib import Path +from subprocess import CalledProcessError from tempfile import TemporaryDirectory import discord @@ -112,7 +113,9 @@ async def _ensure_typst_executable(self) -> None: stderr=asyncio.subprocess.PIPE, ) stdout, _ = await proc.communicate() - log.info(f"Typst executable reports itself as {stdout.decode('utf-8').strip()!r}.") + log.info( + f"Typst executable reports itself as {stdout.decode('utf-8').strip()!r}." + ) async def _download_typst_executable(self) -> None: if not Config.typst_archive_url: @@ -200,7 +203,6 @@ async def typst(self, ctx: commands.Context, *, query: str) -> None: ) return except TypstWorkerCrashedError: - log.exception("typst worker crashed") await ctx.send( "Worker process crashed. " f"Perhaps the memory limit of {TYPST_MEMORY_LIMIT/1024**2:.1f}MB was exceeded?" @@ -228,12 +230,19 @@ async def render_typst( mem_rlimit=TYPST_MEMORY_LIMIT, jobs=WORKER_JOBS, ) - except RuntimeError as e: - raise InvalidTypstError( - e.args[0] if e.args else "" - ) except TimeoutError: raise TypstTimeoutError + except CalledProcessError as e: + err = e.stderr.decode("utf-8") + # when the memory limit is reached this usually shows up as signal 6 (SIGABRT), but it can vary + if e.returncode < 0: + log.debug( + "Typst subprocess died due to a signal %s", + str(e).split("died with")[-1].strip(), + ) + raise TypstWorkerCrashedError + # if in doubt we assume it's a normal error and return the logs + raise InvalidTypstError(err) raw_img = res.output if len(raw_img) > MAX_RAW_SIZE: diff --git a/bot/utils/typst.py b/bot/utils/typst.py index 818b8b6392..02e8483feb 100644 --- a/bot/utils/typst.py +++ b/bot/utils/typst.py @@ -4,6 +4,7 @@ from dataclasses import dataclass from functools import partial from pathlib import Path +from subprocess import CalledProcessError from typing import Literal from bot.constants import Typst as Config @@ -41,7 +42,7 @@ async def compile_typst( """ typst_path = Path(Config.typst_path).resolve() if not typst_path.exists(): - raise FileNotFoundError("Typst executable was not found at path", typst_path) + raise ValueError("Typst executable was not found at path", typst_path) if not root_path.is_dir(): raise ValueError("Root directory was not a directory", root_path) @@ -70,9 +71,16 @@ async def compile_typst( ) stdout, stderr = await proc.communicate(input=source.encode("utf-8")) - # if the task is cancelled or any other problem happens, make sure to kill the worker + if proc.returncode is None: + # shouldn't be possible + raise RuntimeError("Process didn't terminate after communicate") + if proc.returncode != 0: + raise CalledProcessError( + proc.returncode, [typst_path, *args], stdout, stderr + ) + # if the task is cancelled or any other problem happens, make sure to kill the worker if it still exists except BaseException: - with contextlib.suppress(UnboundLocalError): + with contextlib.suppress(UnboundLocalError, ProcessLookupError): proc.kill() raise return TypstCompileResult(output=stdout, stderr=stderr) diff --git a/pyproject.toml b/pyproject.toml index edac48d0fa..653c218fcd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ ignore = [ "RUF029", "S311", "SIM102", "SIM108", + "S404", # S404 is bugged and doesn't respect noqa ] [tool.ruff.lint.isort] From e6dbe2ec28c7c3f35c62051c36c498bf5efa3bc3 Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sat, 9 Nov 2024 19:08:40 +0000 Subject: [PATCH 11/12] Don't use FileNotFoundError --- bot/utils/archives.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bot/utils/archives.py b/bot/utils/archives.py index 55bf8f5d62..61ad2d1a51 100644 --- a/bot/utils/archives.py +++ b/bot/utils/archives.py @@ -15,7 +15,7 @@ def _tar_retrieve_file(archive_data: typing.BinaryIO, filename: str) -> bytes: "Member has the right name but couldn't extract:", el ) return fo.read() - raise FileNotFoundError("No member with this name was found in archive:", filename) + raise ValueError("No member with this name was found in archive:", filename) def _zip_retrieve_file(archive_data: typing.BinaryIO, filename: str) -> bytes: @@ -23,7 +23,7 @@ def _zip_retrieve_file(archive_data: typing.BinaryIO, filename: str) -> bytes: for el in arc.filelist: if PurePath(el.filename).name == filename: return arc.read(el) - raise FileNotFoundError("No member with this name was found in archive:", filename) + raise ValueError("No member with this name was found in archive:", filename) def archive_retrieve_file( From 02a836754a657cfd962ccfca612a4d0092102bdd Mon Sep 17 00:00:00 2001 From: RundownRhino <52856631+RundownRhino@users.noreply.github.com> Date: Sat, 9 Nov 2024 19:39:19 +0000 Subject: [PATCH 12/12] Verify SHA256 hash of typst executable --- bot/exts/fun/typst.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bot/exts/fun/typst.py b/bot/exts/fun/typst.py index 7d0d76ea8c..d1e9793918 100644 --- a/bot/exts/fun/typst.py +++ b/bot/exts/fun/typst.py @@ -120,10 +120,18 @@ async def _ensure_typst_executable(self) -> None: async def _download_typst_executable(self) -> None: if not Config.typst_archive_url: raise ValueError("Trying to download Typst but the archive URL isn't set") + if not Config.typst_archive_sha256: + raise ValueError("Trying to download Typst but the archive hash isn't set") async with self.bot.http_session.get( Config.typst_archive_url, raise_for_status=True ) as response: arc_data = await response.read() + digest = hashlib.sha256(arc_data).hexdigest() + if digest != Config.typst_archive_sha256: + raise ValueError( + f"Retrieved archive doesn't match hash {Config.typst_archive_sha256}; " + f"instead got file with size {len(arc_data)} and hash {digest}" + ) log.info("Retrieved Typst archive, unpacking") typst_executable = archive_retrieve_file( arc_data,