From 3a1f4368635c16f8929473db2f75b2071692a00a Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 11:44:24 +1000
Subject: [PATCH 01/10] Add limited benchmark tests

No CI yet. #50
---
 README.md              |  1 +
 edtf/natlang/tests.py  | 24 ++++++++++++++++++++++++
 edtf/parser/grammar.py |  9 +++++++++
 edtf/parser/tests.py   | 21 +++++++++++++++++++++
 pyproject.toml         |  8 ++++++--
 5 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index a571813..76476c5 100644
--- a/README.md
+++ b/README.md
@@ -401,6 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut
 
 ### Running tests
 - From `python-edtf`, run the unit tests: `pytest`
+- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks
 - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration`
 - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=<your PAT>`
 
diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py
index 78ecbc9..d2c43a5 100644
--- a/edtf/natlang/tests.py
+++ b/edtf/natlang/tests.py
@@ -185,3 +185,27 @@ def test_natlang(input_text, expected_output):
     assert (
         result == expected_output
     ), f"Failed for input: {input_text} - expected {expected_output}, got {result}"
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize(
+    "input_text,expected_output",
+    [
+        ("23rd Dynasty", None),
+        ("January 2008", "2008-01"),
+        ("ca1860", "1860~"),
+        ("uncertain: approx 1862", "1862%"),
+        ("January", "XXXX-01"),
+        ("Winter 1872", "1872-24"),
+        ("before approx January 18 1928", "/1928-01-18~"),
+        ("birthday in 1872", "1872"),
+        ("1270 CE", "1270"),
+        ("2nd century bce", "-01XX"),
+        ("1858/1860", "[1858, 1860]"),
+    ],
+)
+def test_benchmark_natlang(benchmark, input_text, expected_output):
+    """
+    Benchmark selected natural language conversions
+    """
+    benchmark(text_to_edtf, input_text)
diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py
index e6232c4..1747341 100644
--- a/edtf/parser/grammar.py
+++ b/edtf/parser/grammar.py
@@ -1,3 +1,11 @@
+# ruff: noqa: E402 I001
+
+# It's recommended to `enablePackrat()` immediately after importing pyparsing
+# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips
+import pyparsing
+
+pyparsing.ParserElement.enablePackrat()
+
 from pyparsing import (
     Combine,
     NotAny,
@@ -13,6 +21,7 @@
 )
 from pyparsing import Literal as L
 
+
 from edtf.parser.edtf_exceptions import EDTFParseException
 
 # (* ************************** Level 0 *************************** *)
diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py
index 1ec7452..4932e95 100644
--- a/edtf/parser/tests.py
+++ b/edtf/parser/tests.py
@@ -216,6 +216,20 @@
     ("2001-34", ("2001-04-01", "2001-06-30")),
 )
 
+BENCHMARK_EXAMPLES = (
+    "2001-02-03",
+    "2008-12",
+    "2008",
+    "-0999",
+    "2004-01-01T10:10:10+05:00",
+    "-2005/-1999-02",
+    "/2006",
+    "?2004-%06",
+    "[1667, 1760-12]",
+    "Y3388E2S3",
+    "2001-29",
+)
+
 BAD_EXAMPLES = (
     # parentheses are not used for group qualification in the 2018 spec
     None,
@@ -340,3 +354,10 @@ def test_comparisons():
     assert d4 == d5
     assert d1 < d5
     assert d1 > d6
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("test_input", BENCHMARK_EXAMPLES)
+def test_benchmark_parser(benchmark, test_input):
+    """Benchmark parsing of selected EDTF strings."""
+    benchmark(parse, test_input)
diff --git a/pyproject.toml b/pyproject.toml
index 64579ae..56978fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,7 @@ test = [
     "django>=4.2,<5.0",
     "pytest",
     "pytest-django",
+    "pytest-benchmark",
     "ruff",
     "pre-commit",
     "coverage",
@@ -81,8 +82,11 @@ legacy_tox_ini = """
 python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"]
 python_classes = ["Test*", "*Tests"]
 python_functions = ["test_*"]
-addopts = "--ignore=edtf_django_tests/ --cov=edtf"
-plugins = ["pytest_cov"]
+markers = [
+    "benchmark: mark a test as a benchmark",
+]
+addopts = "--ignore=edtf_django_tests/ --cov=edtf -m 'not benchmark'"
+plugins = ["pytest_cov", "pytest_benchmark"]
 
 [tool.coverage.run]
 # we run the edtf_integration tests but only care about them testing fields.py in the main package

From 6e7b1093a43cd70906a7402a01621a0f1a195b3b Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 13:00:34 +1000
Subject: [PATCH 02/10] Add benchmark to CI #50

---
 .github/workflows/ci.yml | 18 ++++++++++++++++++
 dev-requirements.txt     |  2 ++
 edtf/parser/grammar.py   |  5 +++--
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 95c29c6..39d0f4e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,6 +8,8 @@ on:
 permissions:
   checks: write
   contents: write
+  # deployments permission to deploy GitHub pages website
+  deployments: write
   pull-requests: write
 
 
@@ -85,3 +87,19 @@ jobs:
                 echo "Coverage Tests - ${{ steps.coverageComment.outputs.tests }}"
                 echo "Coverage Time - ${{ steps.coverageComment.outputs.time }}"
                 echo "Not Success Test Info - ${{ steps.coverageComment.outputs.notSuccessTestInfo }}"
+
+            - name: Run benchmarks
+              run: |
+                pytest -m benchmark --benchmark-json=./output.json
+
+            - name: Publish benchmark results
+              uses: benchmark-action/github-action-benchmark@v1
+              with:
+                tool: 'pytest'
+                auto-push: false
+                output-file-path: output.json
+                github-token: ${{ secrets.GITHUB_TOKEN }}
+                comment-on-alert: true
+                save-data-file: false
+                skip-fetch-gh-pages: true
+                summary-always: true
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 1e37df5..19242af 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,5 +1,7 @@
 -r requirements.txt  # Include all main requirements
 django>=4.2,<5.0
 pytest
+pytest-benchmark
+pytest-django
 ruff
 pre-commit
diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py
index 1747341..9840bde 100644
--- a/edtf/parser/grammar.py
+++ b/edtf/parser/grammar.py
@@ -2,9 +2,10 @@
 
 # It's recommended to `enablePackrat()` immediately after importing pyparsing
 # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips
-import pyparsing
 
-pyparsing.ParserElement.enablePackrat()
+# TODO: uncomment this once benchmark testing has run once in CI
+# import pyparsing
+# pyparsing.ParserElement.enablePackrat()
 
 from pyparsing import (
     Combine,

From 0ab80edfc0d0016490765b27f145e87332a22b42 Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 14:12:38 +1000
Subject: [PATCH 03/10] Prevent gh-pages push

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 39d0f4e..f30ea57 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -97,9 +97,9 @@ jobs:
               with:
                 tool: 'pytest'
                 auto-push: false
+                comment-always: true
                 output-file-path: output.json
                 github-token: ${{ secrets.GITHUB_TOKEN }}
                 comment-on-alert: true
                 save-data-file: false
-                skip-fetch-gh-pages: true
                 summary-always: true

From 34363577027222d6ce94a92e0dc10a8935f01d44 Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 14:45:34 +1000
Subject: [PATCH 04/10] Add gh-pages push

---
 .github/workflows/ci.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f30ea57..ec93df0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -94,12 +94,13 @@ jobs:
 
             - name: Publish benchmark results
               uses: benchmark-action/github-action-benchmark@v1
+              if: github.event_name != 'pull_request'
               with:
                 tool: 'pytest'
-                auto-push: false
+                auto-push: true
                 comment-always: true
                 output-file-path: output.json
                 github-token: ${{ secrets.GITHUB_TOKEN }}
                 comment-on-alert: true
-                save-data-file: false
+                save-data-file: true
                 summary-always: true

From 23a3d7e1de070bb0156e06d5ac7a91cf081d00e6 Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 15:34:42 +1000
Subject: [PATCH 05/10] Make 2 CI paths #50

---
 .github/workflows/ci.yml | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ec93df0..370258a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -101,6 +101,18 @@ jobs:
                 comment-always: true
                 output-file-path: output.json
                 github-token: ${{ secrets.GITHUB_TOKEN }}
-                comment-on-alert: true
+                comment-on-alert: false
+                save-data-file: true
+                summary-always: true
+
+            - name: Publish benchmark results
+              uses: benchmark-action/github-action-benchmark@v1
+              if: github.event_name == 'pull_request'
+              with:
+                tool: 'pytest'
+                auto-push: false
+                comment-always: true
+                output-file-path: output.json
+                comment-on-alert: false
                 save-data-file: true
                 summary-always: true

From bb6e64052487511a23e256db10ca74308dd5c11b Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 15:39:25 +1000
Subject: [PATCH 06/10] Store/retrieve previous results

---
 .github/workflows/ci.yml | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 370258a..fefb0c2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -92,6 +92,12 @@ jobs:
               run: |
                 pytest -m benchmark --benchmark-json=./output.json
 
+            - name: Download previous benchmark data
+              uses: actions/cache@v4
+              with:
+                path: ./cache
+                key: ${{ runner.os }}-benchmark
+
             - name: Publish benchmark results
               uses: benchmark-action/github-action-benchmark@v1
               if: github.event_name != 'pull_request'
@@ -101,11 +107,13 @@ jobs:
                 comment-always: true
                 output-file-path: output.json
                 github-token: ${{ secrets.GITHUB_TOKEN }}
-                comment-on-alert: false
+                comment-on-alert: true
                 save-data-file: true
                 summary-always: true
+                # Where the previous data file is stored
+                external-data-json-path: ./cache/benchmark-data.json
 
-            - name: Publish benchmark results
+            - name: Comment on benchmark results without publishing
               uses: benchmark-action/github-action-benchmark@v1
               if: github.event_name == 'pull_request'
               with:
@@ -116,3 +124,4 @@ jobs:
                 comment-on-alert: false
                 save-data-file: true
                 summary-always: true
+                external-data-json-path: ./cache/benchmark-data.json

From 13a8315234dae048461e8b2bd53b840f0bea8e12 Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 15:44:12 +1000
Subject: [PATCH 07/10] Do not auto-push when using external-data file

---
 .github/workflows/ci.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fefb0c2..a13671e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -110,12 +110,9 @@ jobs:
                 comment-on-alert: true
                 save-data-file: true
                 summary-always: true
-                # Where the previous data file is stored
-                external-data-json-path: ./cache/benchmark-data.json
 
             - name: Comment on benchmark results without publishing
               uses: benchmark-action/github-action-benchmark@v1
-              if: github.event_name == 'pull_request'
               with:
                 tool: 'pytest'
                 auto-push: false

From 57af55917d8baba8c334ab2bf7c0bce0d465d0ed Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 15:47:14 +1000
Subject: [PATCH 08/10] GH token required for comment-always

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a13671e..0f97b3c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -116,6 +116,7 @@ jobs:
               with:
                 tool: 'pytest'
                 auto-push: false
+                github-token: ${{ secrets.GITHUB_TOKEN }}
                 comment-always: true
                 output-file-path: output.json
                 comment-on-alert: false

From 90558b6bede78d310755e303328745ad4c70c087 Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 16:16:32 +1000
Subject: [PATCH 09/10] Activate packrat #50

---
 edtf/parser/grammar.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py
index 9840bde..dc0f66d 100644
--- a/edtf/parser/grammar.py
+++ b/edtf/parser/grammar.py
@@ -3,9 +3,9 @@
 # It's recommended to `enablePackrat()` immediately after importing pyparsing
 # https://github.com/pyparsing/pyparsing/wiki/Performance-Tips
 
-# TODO: uncomment this once benchmark testing has run once in CI
-# import pyparsing
-# pyparsing.ParserElement.enablePackrat()
+import pyparsing
+
+pyparsing.ParserElement.enablePackrat()
 
 from pyparsing import (
     Combine,

From 6c0e23990a259e2bd66f41781d950940e015e379 Mon Sep 17 00:00:00 2001
From: Alastair Weakley <alastair@interaction.net.au>
Date: Mon, 27 May 2024 16:30:08 +1000
Subject: [PATCH 10/10] Include benchmark url

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 76476c5..9fc6ede 100644
--- a/README.md
+++ b/README.md
@@ -401,7 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut
 
 ### Running tests
 - From `python-edtf`, run the unit tests: `pytest`
-- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks
+- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks (published [here]( https://ixc.github.io/python-edtf/dev/bench/))
 - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration`
 - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=<your PAT>`