From 3a1f4368635c16f8929473db2f75b2071692a00a Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 11:44:24 +1000 Subject: [PATCH 01/10] Add limited benchmark tests No CI yet. #50 --- README.md | 1 + edtf/natlang/tests.py | 24 ++++++++++++++++++++++++ edtf/parser/grammar.py | 9 +++++++++ edtf/parser/tests.py | 21 +++++++++++++++++++++ pyproject.toml | 8 ++++++-- 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a571813..76476c5 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` +- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 78ecbc9..d2c43a5 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -185,3 +185,27 @@ def test_natlang(input_text, expected_output): assert ( result == expected_output ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" + + +@pytest.mark.benchmark +@pytest.mark.parametrize( + "input_text,expected_output", + [ + ("23rd Dynasty", None), + ("January 2008", "2008-01"), + ("ca1860", "1860~"), + ("uncertain: approx 1862", "1862%"), + ("January", "XXXX-01"), + ("Winter 1872", "1872-24"), + ("before approx January 18 1928", "/1928-01-18~"), + ("birthday in 1872", "1872"), + ("1270 CE", "1270"), + ("2nd century bce", "-01XX"), + ("1858/1860", "[1858, 1860]"), + ], +) +def test_benchmark_natlang(benchmark, input_text, expected_output): + """ + Benchmark selected natural language conversions + """ + benchmark(text_to_edtf, input_text) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index e6232c4..1747341 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,3 +1,11 @@ +# ruff: noqa: E402 I001 + +# It's recommended to `enablePackrat()` immediately after importing pyparsing +# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips +import pyparsing + +pyparsing.ParserElement.enablePackrat() + from pyparsing import ( Combine, NotAny, @@ -13,6 +21,7 @@ ) from pyparsing import Literal as L + from edtf.parser.edtf_exceptions import EDTFParseException # (* ************************** Level 0 *************************** *) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 1ec7452..4932e95 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -216,6 +216,20 @@ ("2001-34", ("2001-04-01", "2001-06-30")), ) +BENCHMARK_EXAMPLES = ( + "2001-02-03", + "2008-12", + "2008", + "-0999", + "2004-01-01T10:10:10+05:00", + "-2005/-1999-02", + "/2006", + "?2004-%06", + "[1667, 1760-12]", + "Y3388E2S3", + "2001-29", +) + BAD_EXAMPLES = ( # parentheses are not used for group qualification in the 2018 spec None, @@ -340,3 +354,10 @@ def test_comparisons(): assert d4 == d5 assert d1 < d5 assert d1 > d6 + + +@pytest.mark.benchmark +@pytest.mark.parametrize("test_input", BENCHMARK_EXAMPLES) +def test_benchmark_parser(benchmark, test_input): + """Benchmark parsing of selected EDTF strings.""" + benchmark(parse, test_input) diff --git a/pyproject.toml b/pyproject.toml index 64579ae..56978fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ test = [ "django>=4.2,<5.0", "pytest", "pytest-django", + "pytest-benchmark", "ruff", "pre-commit", "coverage", @@ -81,8 +82,11 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] -addopts = "--ignore=edtf_django_tests/ --cov=edtf" -plugins = ["pytest_cov"] +markers = [ + "benchmark: mark a test as a benchmark", +] +addopts = "--ignore=edtf_django_tests/ --cov=edtf -m 'not benchmark'" +plugins = ["pytest_cov", "pytest_benchmark"] [tool.coverage.run] # we run the edtf_integration tests but only care about them testing fields.py in the main package From 6e7b1093a43cd70906a7402a01621a0f1a195b3b Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 13:00:34 +1000 Subject: [PATCH 02/10] Add benchmark to CI #50 --- .github/workflows/ci.yml | 18 ++++++++++++++++++ dev-requirements.txt | 2 ++ edtf/parser/grammar.py | 5 +++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 95c29c6..39d0f4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,8 @@ on: permissions: checks: write contents: write + # deployments permission to deploy GitHub pages website + deployments: write pull-requests: write @@ -85,3 +87,19 @@ jobs: echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}" echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}" echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}" + + - name: Run benchmarks + run: | + pytest -m benchmark --benchmark-json=./output.json + + - name: Publish benchmark results + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: 'pytest' + auto-push: false + output-file-path: output.json + github-token: ${{ secrets.GITHUB_TOKEN }} + comment-on-alert: true + save-data-file: false + skip-fetch-gh-pages: true + summary-always: true diff --git a/dev-requirements.txt b/dev-requirements.txt index 1e37df5..19242af 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,7 @@ -r requirements.txt # Include all main requirements django>=4.2,<5.0 pytest +pytest-benchmark +pytest-django ruff pre-commit diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 1747341..9840bde 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -2,9 +2,10 @@ # It's recommended to `enablePackrat()` immediately after importing pyparsing # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips -import pyparsing -pyparsing.ParserElement.enablePackrat() +# TODO: uncomment this once benchmark testing has run once in CI +# import pyparsing +# pyparsing.ParserElement.enablePackrat() from pyparsing import ( Combine, From 0ab80edfc0d0016490765b27f145e87332a22b42 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 14:12:38 +1000 Subject: [PATCH 03/10] Prevent gh-pages push --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39d0f4e..f30ea57 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,9 +97,9 @@ jobs: with: tool: 'pytest' auto-push: false + comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} comment-on-alert: true save-data-file: false - skip-fetch-gh-pages: true summary-always: true From 34363577027222d6ce94a92e0dc10a8935f01d44 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 14:45:34 +1000 Subject: [PATCH 04/10] Add gh-pages push --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f30ea57..ec93df0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,12 +94,13 @@ jobs: - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 + if: github.event_name != 'pull_request' with: tool: 'pytest' - auto-push: false + auto-push: true comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} comment-on-alert: true - save-data-file: false + save-data-file: true summary-always: true From 23a3d7e1de070bb0156e06d5ac7a91cf081d00e6 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:34:42 +1000 Subject: [PATCH 05/10] Make 2 CI paths #50 --- .github/workflows/ci.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec93df0..370258a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,6 +101,18 @@ jobs: comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} - comment-on-alert: true + comment-on-alert: false + save-data-file: true + summary-always: true + + - name: Publish benchmark results + uses: benchmark-action/github-action-benchmark@v1 + if: github.event_name == 'pull_request' + with: + tool: 'pytest' + auto-push: false + comment-always: true + output-file-path: output.json + comment-on-alert: false save-data-file: true summary-always: true From bb6e64052487511a23e256db10ca74308dd5c11b Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:39:25 +1000 Subject: [PATCH 06/10] Store/retrieve previous results --- .github/workflows/ci.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 370258a..fefb0c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,6 +92,12 @@ jobs: run: | pytest -m benchmark --benchmark-json=./output.json + - name: Download previous benchmark data + uses: actions/cache@v4 + with: + path: ./cache + key: ${{ runner.os }}-benchmark + - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 if: github.event_name != 'pull_request' @@ -101,11 +107,13 @@ jobs: comment-always: true output-file-path: output.json github-token: ${{ secrets.GITHUB_TOKEN }} - comment-on-alert: false + comment-on-alert: true save-data-file: true summary-always: true + # Where the previous data file is stored + external-data-json-path: ./cache/benchmark-data.json - - name: Publish benchmark results + - name: Comment on benchmark results without publishing uses: benchmark-action/github-action-benchmark@v1 if: github.event_name == 'pull_request' with: @@ -116,3 +124,4 @@ jobs: comment-on-alert: false save-data-file: true summary-always: true + external-data-json-path: ./cache/benchmark-data.json From 13a8315234dae048461e8b2bd53b840f0bea8e12 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:44:12 +1000 Subject: [PATCH 07/10] Do not auto-push when using external-data file --- .github/workflows/ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fefb0c2..a13671e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -110,12 +110,9 @@ jobs: comment-on-alert: true save-data-file: true summary-always: true - # Where the previous data file is stored - external-data-json-path: ./cache/benchmark-data.json - name: Comment on benchmark results without publishing uses: benchmark-action/github-action-benchmark@v1 - if: github.event_name == 'pull_request' with: tool: 'pytest' auto-push: false From 57af55917d8baba8c334ab2bf7c0bce0d465d0ed Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 15:47:14 +1000 Subject: [PATCH 08/10] GH token required for comment-always --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a13671e..0f97b3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -116,6 +116,7 @@ jobs: with: tool: 'pytest' auto-push: false + github-token: ${{ secrets.GITHUB_TOKEN }} comment-always: true output-file-path: output.json comment-on-alert: false From 90558b6bede78d310755e303328745ad4c70c087 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 16:16:32 +1000 Subject: [PATCH 09/10] Activate packrat #50 --- edtf/parser/grammar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index 9840bde..dc0f66d 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -3,9 +3,9 @@ # It's recommended to `enablePackrat()` immediately after importing pyparsing # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips -# TODO: uncomment this once benchmark testing has run once in CI -# import pyparsing -# pyparsing.ParserElement.enablePackrat() +import pyparsing + +pyparsing.ParserElement.enablePackrat() from pyparsing import ( Combine, From 6c0e23990a259e2bd66f41781d950940e015e379 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Mon, 27 May 2024 16:30:08 +1000 Subject: [PATCH 10/10] Include benchmark url --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 76476c5..9fc6ede 100644 --- a/README.md +++ b/README.md @@ -401,7 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` -- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks +- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks (published [here]( https://ixc.github.io/python-edtf/dev/bench/)) - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=`