Skip to content

Commit

Permalink
Image cache (#906)
Browse files Browse the repository at this point in the history
* first step for cached generated assets

* set up individual asset generating functions, except latex_image

* finish individual asset generation functions which manage generated cache

* implement generate assets with cache

* format and add to changelog

* add options for generate and build commands

* add asset_type to hashes to avoid collision

* fix tests and subset build hash table
  • Loading branch information
oscarlevin authored Jan 22, 2025
1 parent b5257f8 commit 7db949b
Show file tree
Hide file tree
Showing 13 changed files with 428 additions and 320 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ Instructions: Add a subsection under `[Unreleased]` for additions, fixes, change

## [Unreleased]

### Changed

- Asset generation of asymptote, latex-image, and sageplot now utilize a *generated-cache* of images (stored in `.generated-cache` in the root of a project, but customizable in `project.ptx`). This should speed up building and generating assets.

## [2.12.0] - 2025-01-16

Includes updates to core through commit: [3ce0b18](https://github.com/PreTeXtBook/pretext/commit/3ce0b18284473f5adf52cea46374688299b6d643)
Expand Down
18 changes: 17 additions & 1 deletion pretext/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def build(
try:
for t in targets:
log.info(f"Generating assets for {t.name}")
t.generate_assets(only_changed=False, xmlid=xmlid)
t.generate_assets(only_changed=False, xmlid=xmlid, clean=clean)
no_generate = True
except Exception as e:
log.error(f"Failed to generate assets: {e} \n")
Expand Down Expand Up @@ -672,6 +672,18 @@ def build(
default=False,
help="Generate all possible asset formats rather than just the defaults for the specified target.",
)
@click.option(
"--clean",
is_flag=True,
default=False,
help="Remove all generated assets, including the cache, before generating new ones.",
)
@click.option(
"-f",
"--force",
is_flag=True,
help="Force generation of assets; do not rely on assets in the cache.",
)
@click.pass_context
@nice_errors
def generate(
Expand All @@ -681,6 +693,8 @@ def generate(
all_formats: bool,
only_changed: bool,
xmlid: Optional[str],
clean: bool,
force: bool,
) -> None:
"""
Generate specified (or all) assets for the default target (first target in "project.ptx"). Asset "generation" is typically
Expand Down Expand Up @@ -717,6 +731,8 @@ def generate(
all_formats=all_formats,
only_changed=only_changed, # Unless requested, generate all assets, so don't check the cache.
xmlid=xmlid,
clean=clean,
skip_cache=force,
)
log.info("Finished generating assets.\n")
except ValidationError as e:
Expand Down
540 changes: 237 additions & 303 deletions pretext/project/__init__.py

Large diffs are not rendered by default.

142 changes: 142 additions & 0 deletions pretext/project/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import typing as t
import logging
import hashlib
from pathlib import Path
import shutil
from .. import core


log = logging.getLogger("ptxlogger")


# The individual asset type generation functions


def individual_asymptote(
asydiagram: str,
outformat: str,
method: str,
asy_cli: t.List[str],
asyversion: str,
alberta: str,
dest_dir: Path,
cache_dir: Path,
skip_cache: bool = False,
) -> None:
"""
Checks whether a cached version of the diagram in the correct outformat exists. If it does, copies it to the dest_dir and returns. If it does not, calls the core.individual_asymptote_conversion function to generate the diagram in the correct outformat and then copies it to the dest_dir. In the latter case, also makes a copy to the cached version in the cache_dir.
- outformat will be a file extension.
"""
log.debug("Using the CLI's individual_asymptote function")
asset_file = Path(asydiagram).resolve()
cache_file = cache_asset_filename(asset_file, outformat, "asymptote", cache_dir)
output_file = dest_dir / asset_file.with_suffix(f".{outformat}").name
if cache_file.exists() and not skip_cache:
log.debug(f"Copying cached asymptote diagram {cache_file} to {output_file}")
shutil.copy2(cache_file, output_file)
else:
core.individual_asymptote_conversion(
asydiagram, outformat, method, asy_cli, asyversion, alberta, dest_dir
)
if output_file.exists():
log.debug(
f"Created asymptote diagram {output_file}; saving a copy to cache as {cache_file}"
)
shutil.copy2(output_file, cache_file)
log.debug("Finished individual_asymptote function")


def individual_sage(
sageplot: str,
outformat: str,
dest_dir: Path,
sage_executable_cmd: t.List[str],
cache_dir: Path,
skip_cache: bool = False,
) -> None:
"""
Checks whether a cached version of the diagram in the correct outformat exists. If it does, copies it to the dest_dir and returns. If it does not, calls the core.individual_asymptote_conversion function to generate the diagram in the correct outformat and then copies it to the dest_dir. In the latter case, also makes a copy to the cached version in the cache_dir.
- outformat will be a file extension.
"""

log.debug("Using the CLI's individual_sage function")
asset_file = Path(sageplot).resolve()
cache_file = cache_asset_filename(
asset_file,
outformat,
"sageplot",
cache_dir,
)
output_file = dest_dir / asset_file.with_suffix(f".{outformat}").name
if cache_file.exists() and not skip_cache:
log.debug(f"Copying cached sageplot diagram {cache_file} to {output_file}")
shutil.copy2(cache_file, output_file)
else:
core.individual_sage_conversion(
sageplot, outformat, dest_dir, sage_executable_cmd
)
if output_file.exists():
log.debug(
f"Created sageplot diagram {output_file}; saving a copy to cache as {cache_file}"
)
shutil.copy2(output_file, cache_file)
log.debug("Finished individual_sage function")


def individual_latex_image(
latex_image: str,
outformat: str,
dest_dir: Path,
method: str,
cache_dir: Path,
skip_cache: bool = False,
) -> None:
"""
Checks whether a cached version of the diagram in the correct outformat exists. If it does, copies it to the dest_dir and returns. If it does not, calls the core.individual_latex_image_conversion function to generate the diagram in the correct outformat and then copies it to the dest_dir. In the latter case, also makes a copy to the cached version in the cache_dir.
- outformat will be 'all' or a file extension.
"""
log.debug("Using the CLI's individual_latex function")
asset_file = Path(latex_image).resolve()
outformats = ["png", "pdf", "svg", "eps"] if outformat == "all" else [outformat]
cache_files = {
ext: cache_asset_filename(asset_file, ext, "latex_image", cache_dir)
for ext in outformats
}
output_files = {
ext: dest_dir / asset_file.with_suffix(f".{ext}").name for ext in outformats
}
# In case outformat was "all", we check whether all the desired outformats are cached. If not, we generate all of them (since it is only the first that is time-intensive)
all_cached = True
for ext in outformats:
if not cache_files[ext].exists():
all_cached = False
break
if all_cached and not skip_cache:
for ext in outformats:
log.debug(
f"Copying cached latex-image {cache_files[ext]} to {output_files[ext]}"
)
shutil.copy2(cache_files[ext], output_files[ext])
else:
core.individual_latex_image_conversion(latex_image, outformat, dest_dir, method)
for ext in outformats:
if output_files[ext].exists():
log.debug(
f"Created latex-image {output_files[ext]}; saving a copy to cache as {cache_files[ext]}"
)
shutil.copy2(output_files[ext], cache_files[ext])
log.debug("Finished individual_latex function")


def cache_asset_filename(
asset_file: Path, extension: str, asset_type: str, cache_dir: Path
) -> Path:
asset_content = asset_file.read_bytes()
hash = hashlib.md5()
# hash the asset file
hash.update(asset_content)
# include the asset_type in hash
hash.update(asset_type.encode())
asset_hash = hash.hexdigest()
# create the cache file name
return cache_dir / f"{asset_hash}.{extension}"
2 changes: 1 addition & 1 deletion pretext/resources/resource_hash_table.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"2.11.5": {"project.ptx": "20b8cd8099dc3b21a04bcf94d9386446f07e7b11eb1bc3247875546a173b0b3c", "codechat_config.yaml": "d1314aefecb11bf4dee0775ba095ad90bd508c2023c8be1a4dc02ed71406afee", ".gitignore": "56a9ffb6b221bea906348ab3ccb4af37a42d89aeeaaff1e8abf46c06217fd05c", ".devcontainer.json": "4a3c939ffe2fdae8670da5254984419107a794013ef0487e2cbc84db465d0371", "pretext-cli.yml": "acc8fa861bba25478048c1d731a0e2057a746dff9542ee007766111886660f8a"}}
{"2.11.5": {"project.ptx": "20b8cd8099dc3b21a04bcf94d9386446f07e7b11eb1bc3247875546a173b0b3c", "codechat_config.yaml": "d1314aefecb11bf4dee0775ba095ad90bd508c2023c8be1a4dc02ed71406afee", ".gitignore": "56a9ffb6b221bea906348ab3ccb4af37a42d89aeeaaff1e8abf46c06217fd05c", ".devcontainer.json": "4a3c939ffe2fdae8670da5254984419107a794013ef0487e2cbc84db465d0371", "pretext-cli.yml": "acc8fa861bba25478048c1d731a0e2057a746dff9542ee007766111886660f8a"}, "2.12.1": {"project.ptx": "8772864348ecfad7eff81240ff2463f0956a605dfb0cb8eeb0edaf7943991ca8", "codechat_config.yaml": "8fbb8c5e888da3b49e070dd189195becc104b082f3d3c3dfbd0de0f9652a1c7e", ".gitignore": "d0a7e5ca8ec411488d2b2e4cc5c0871313dcacdbfb74710c43ae935608bc8b7e", ".devcontainer.json": "7113274fe6b005b742e68381d477cee3376b4bff8f673eade0b6300a88b2a8eb", "pretext-cli.yml": "a01524273301bfb48b1d72ec0c968f51e63f81a299007b2604e53cb53509ff3b"}}
2 changes: 1 addition & 1 deletion pretext/types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import typing as t

# AssetTable is a dictionary of asset types mapped to dictionaries of xml:ids to hashes of the source of that xml:id.
AssetTable = t.Dict[str, t.Dict[str, bytes]]
AssetTable = t.Dict[str, str]
14 changes: 7 additions & 7 deletions pretext/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,17 +421,17 @@ def clean_asset_table(
"""
Removes any assets from the dirty_table that are not in the clean_table.
"""
# First purge any asset types that are no longer in the clean table:
# Purge any asset types that are no longer in the clean table:
dirty_table = {
asset: dirty_table[asset] for asset in dirty_table if asset in clean_table
}
# Then purge ids of assets that no longer exist in the clean table:
for asset in dirty_table:
dirty_table[asset] = {
id: dirty_table[asset][id]
for id in dirty_table[asset]
if id in clean_table[asset]
}
# for asset in dirty_table:
# dirty_table[asset] = {
# id: dirty_table[asset][id]
# for id in dirty_table[asset]
# if id in clean_table[asset]
# }
return dirty_table


Expand Down
2 changes: 1 addition & 1 deletion templates/.devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// This file was automatically generated with PreTeXt 2.11.5.
// This file was automatically generated with PreTeXt 2.12.1.
// If you modify this file, PreTeXt will no longer automatically update it.
//
//////////////////////////////////////////////////////////////
Expand Down
3 changes: 2 additions & 1 deletion templates/.gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This file was automatically generated with PreTeXt 2.11.5.
# This file was automatically generated with PreTeXt 2.12.1.
# If you modify this file, PreTeXt will no longer automatically update it.
#
# Boilerplate list of files in a PreTeXt project for git to ignore
Expand All @@ -11,6 +11,7 @@ published

# don't track assets generated from source
generated-assets
.cache

# don't track the executables.ptx file
executables.ptx
Expand Down
2 changes: 1 addition & 1 deletion templates/codechat_config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This file was automatically generated with PreTeXt 2.11.5.
# This file was automatically generated with PreTeXt 2.12.1.
# If you modify this file, PreTeXt will no longer automatically update it.
#
#############################################################
Expand Down
2 changes: 1 addition & 1 deletion templates/pretext-cli.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This file was automatically generated with PreTeXt 2.11.5.
# This file was automatically generated with PreTeXt 2.12.1.
# If you modify this file, PreTeXt will no longer automatically update it.
#
name: PreTeXt-CLI Actions
Expand Down
3 changes: 2 additions & 1 deletion templates/project.ptx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- This file was automatically generated by PreTeXt 2.11.5. -->
<!-- This file was automatically generated by PreTeXt 2.12.1. -->
<!-- If you modify this file, PreTeXt will no longer automatically update it.-->

<!-- This file, the project manifest, provides the overall configuration for your PreTeXt project. To edit the content of your document, open `source/main.ptx`. See https://pretextbook.org/doc/guide/html/processing-CLI.html#cli-project-manifest. -->
Expand All @@ -22,6 +22,7 @@
stage="output/stage"
xsl="xsl"
asy-method="server"
generated-cache=".cache"
>
<targets>
<target
Expand Down
14 changes: 12 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,22 @@ def test_build(tmp_path: Path, script_runner: ScriptRunner) -> None:
[PTX_CMD, "-v", "debug", "build", "web", "-x", "sec-latex-image", "-q"],
cwd=project_path,
).success
assert not (project_path / "generated-assets" / "latex-image").exists()
assert not (
project_path
/ "generated-assets"
/ "latex-image"
/ "fig_tikz-example-diagram.svg"
).exists()
assert script_runner.run(
[PTX_CMD, "-v", "debug", "build", "web", "-x", "sec-latex-image"],
cwd=project_path,
).success
assert (project_path / "generated-assets" / "latex-image").exists()
assert (
project_path
/ "generated-assets"
/ "latex-image"
/ "fig_tikz-example-diagram.svg"
).exists()

# Do a full build.
assert script_runner.run(
Expand Down

0 comments on commit 7db949b

Please sign in to comment.