Skip to content

Commit

Permalink
feat: store snapshot values in external files
Browse files Browse the repository at this point in the history
  • Loading branch information
15r10nk committed Dec 10, 2023
1 parent b910648 commit 9258b59
Show file tree
Hide file tree
Showing 22 changed files with 1,426 additions and 354 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ repos:
rev: v3.12.0
hooks:
- args:
- --py38-plus
- --py37-plus
id: reorder-python-imports
- hooks:
- args:
- --py38-plus
- --py37-plus
id: pyupgrade
repo: https://github.com/asottile/pyupgrade
rev: v3.15.0
Expand Down
15 changes: 15 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pytest

from tests.utils import snapshot_env
from tests.utils import storage # noqa
from tests.utils import useStorage


@pytest.fixture(autouse=True)
def snapshot_env_for_doctest(request, storage):
if hasattr(request.node, "dtest"):
with snapshot_env():
with useStorage(storage):
yield
else:
yield
12 changes: 12 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@


Default configuration:

``` toml
[tool.inline-snapshot]
hash-length=15
```

* *hash-length* specifies the length of the hash used by `external()` in the code representation.
This does not affect the hash length used to store the data.
The hash should be long enough to avoid hash collisions.
76 changes: 76 additions & 0 deletions docs/outsource.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
## General

Storing snapshots in the source code is the main feature of inline snapshots.
This has the advantage that you can easily see changes in code reviews. But it also has some problems:

* It is problematic to snapshot a lot of data, because it takes up a lot of space in your tests.
* Binary data or images are not readable in your tests.

The `outsource(...)` function solves this problem and integrates itself nicely with the inline snapshot.
It stores the data in a special `external()` object that can be compared in snapshots.
The object is represented by the hash of the data.
The actual data is stored in a separate file in your project.

This allows the test to be renamed and moved around in your code without losing the connection to the stored data.

Example:

=== "original code"
<!-- inline-snapshot: outcome-passed=1 outcome-errors=1 -->
```python
def test_something():
assert outsource("long text\n" * 1000) == snapshot()
```

=== "--inline-snapshot=create"
<!-- inline-snapshot: create -->
```python
from inline_snapshot import external


def test_something():
assert outsource("long text\n" * 1000) == snapshot(
external("f5a956460453*.txt")
)
```

The `external` object can be used inside other data structures.

=== "original code"
<!-- inline-snapshot: outcome-passed=1 outcome-errors=1 -->
```python
def test_something():
assert [
outsource("long text\n" * times) for times in [50, 100, 1000]
] == snapshot()
```

=== "--inline-snapshot=create"
<!-- inline-snapshot: create -->
```python
from inline_snapshot import external


def test_something():
assert [
outsource("long text\n" * times) for times in [50, 100, 1000]
] == snapshot(
[
external("362ad8374ed6*.txt"),
external("5755afea3f8d*.txt"),
external("f5a956460453*.txt"),
]
)
```


## API

::: inline_snapshot.outsource
::: inline_snapshot.external

## pytest options

It interacts with the following `--inline-snapshot` flags:

- `trim` removes every snapshots form the storage which is not referenced with `external(...)` in the code.
4 changes: 3 additions & 1 deletion inline_snapshot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from ._external import external
from ._external import outsource
from ._inline_snapshot import snapshot

__all__ = ["snapshot"]
__all__ = ["snapshot", "external", "outsource"]

__version__ = "0.5.2"
33 changes: 33 additions & 0 deletions inline_snapshot/_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from dataclasses import dataclass
from pathlib import Path

import toml


@dataclass
class Config:
hash_length: int = 12


config = Config()


def read_config(path: Path) -> Config:
if not path.exists():
return Config()

data = toml.loads(path.read_text())

result = Config()

try:
config = data["tool"]["inline-snapshot"]
except KeyError:
pass
else:
try:
result.hash_length = config["hash-length"]
except KeyError:
pass

return result
175 changes: 175 additions & 0 deletions inline_snapshot/_external.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import hashlib
import pathlib
import re
from typing import Optional
from typing import Set
from typing import Union

from . import _config


class HashError(Exception):
pass


class DiscStorage:
def __init__(self, directory):
self.directory = pathlib.Path(directory)

def _ensure_directory(self):
self.directory.mkdir(exist_ok=True, parents=True)
gitignore = self.directory / ".gitignore"
if not gitignore.exists():
gitignore.write_text(
"# ignore all snapshots which are not refered in the source\n*-new.*\n"
)

def save(self, name, data):
assert "*" not in name
self._ensure_directory()
(self.directory / name).write_bytes(data)

def read(self, name):
return self._lookup_path(name).read_bytes()

def prune_new_files(self):
for file in self.directory.glob("*-new.*"):
file.unlink()

def list(self) -> Set[str]:
if self.directory.exists():
return {item.name for item in self.directory.iterdir()} - {".gitignore"}
else:
return set()

def persist(self, name):
file = self._lookup_path(name)
if file.stem.endswith("-new"):
stem = file.stem[:-4]
file.rename(file.with_name(stem + file.suffix))

def _lookup_path(self, name) -> pathlib.Path:
files = list(self.directory.glob(name))

if len(files) > 1:
raise HashError(f"hash collision files={sorted(f.name for f in files)}")

if not files:
raise HashError(f"hash {name!r} is not found in the DiscStorage")

return files[0]

def lookup_all(self, name) -> Set[str]:
return {file.name for file in self.directory.glob(name)}

def remove(self, name):
self._lookup_path(name).unlink()


storage: Optional[DiscStorage] = None


class external:
def __init__(self, name: str):
"""External objects are used as a representation for outsourced data.
You should not create them directly.
The external data is stored inside `.inline_snapshot/external`.
Data which is outsourced but not referenced in the source code jet has a '-new' suffix in the filename.
Parameters:
name: the name of the external stored object.
"""

m = re.fullmatch(r"([0-9a-fA-F]*)\*?(\.[a-zA-Z0-9]*)", name)

if m:
self._hash, self._suffix = m.groups()
else:
raise ValueError(
"path has to be of the form <hash>.<suffix> or <partial_hash>*.<suffix>"
)

@property
def _path(self):
return f"{self._hash}*{self._suffix}"

def __repr__(self):
"""Returns the representation of the external object.
The length of the hash can be specified in the
[config](configuration.md).
"""
hash = self._hash[: _config.config.hash_length]

if len(hash) == 64:
return f'external("{hash}{self._suffix}")'
else:
return f'external("{hash}*{self._suffix}")'

def __eq__(self, other):
"""Two external objects are equal if they have the same hash and
suffix."""
if not isinstance(other, external):
return NotImplemented

min_hash_len = min(len(self._hash), len(other._hash))

if self._hash[:min_hash_len] != other._hash[:min_hash_len]:
return False

if self._suffix != other._suffix:
return False

return True

def _load_value(self):
assert storage is not None
return storage.read(self._path)


def outsource(data: Union[str, bytes], *, suffix: Optional[str] = None) -> external:
"""Outsource some data into an external file.
``` pycon
>>> png_data = b"some_bytes" # should be the replaced with your actual data
>>> outsource(png_data, suffix=".png")
external("212974ed1835*.png")
```
Parameters:
data: data which should be outsourced. strings are encoded with `"utf-8"`.
suffix: overwrite file suffix. The default is `".bin"` if data is an instance of `#!python bytes` and `".txt"` for `#!python str`.
Returns:
The external data.
"""
if isinstance(data, str):
data = data.encode("utf-8")
if suffix is None:
suffix = ".txt"

elif isinstance(data, bytes):
if suffix is None:
suffix = ".bin"
else:
raise TypeError("data has to be of type bytes | str")

if not suffix or suffix[0] != ".":
raise ValueError("suffix has to start with a '.' like '.png'")

m = hashlib.sha256()
m.update(data)
hash = m.hexdigest()

assert storage is not None

name = hash + suffix

if not storage.lookup_all(name):
path = hash + "-new" + suffix
storage.save(path, data)

return external(name)
Loading

0 comments on commit 9258b59

Please sign in to comment.