From eefdecda80a3d38b08538186ff8f1bd2002bfc0d Mon Sep 17 00:00:00 2001 From: awwaawwa <8493196+awwaawwa@users.noreply.github.com> Date: Sun, 26 Jan 2025 10:21:23 +0800 Subject: [PATCH 1/5] feat(translator): optimize thread executor configuration for better QPS management --- yadt/document_il/midend/il_translator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yadt/document_il/midend/il_translator.py b/yadt/document_il/midend/il_translator.py index c703222e..58380517 100644 --- a/yadt/document_il/midend/il_translator.py +++ b/yadt/document_il/midend/il_translator.py @@ -134,7 +134,7 @@ def translate(self, docs: Document): self.stage_name, total ) as pbar: with concurrent.futures.ThreadPoolExecutor( - max_workers=self.translation_config.qps * 2 + max_workers=min(self.translation_config.qps * 2, self.translation_config.qps + 5) ) as executor: for page in docs.page: self.process_page(page, executor, pbar, tracker.new_page()) From 00510abc4a86b684e1c4007ce07d111a4380a2e0 Mon Sep 17 00:00:00 2001 From: awwaawwa <8493196+awwaawwa@users.noreply.github.com> Date: Sun, 26 Jan 2025 11:16:02 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=90=9B=20fix(cache):=20improve=20temp?= =?UTF-8?q?orary=20database=20file=20handling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - replace `tempfile.mktemp()` with `tempfile.NamedTemporaryFile()` for safer file management - ensure temporary file is closed and can be used by SQLite - add `delete=False` to prevent immediate file removal --- yadt/document_il/translator/cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yadt/document_il/translator/cache.py b/yadt/document_il/translator/cache.py index 087108d8..03fe4398 100644 --- a/yadt/document_il/translator/cache.py +++ b/yadt/document_il/translator/cache.py @@ -111,7 +111,10 @@ def init_db(remove_exists=False): def init_test_db(): import tempfile - cache_db_path = tempfile.mktemp(suffix=".db") + temp_file = tempfile.NamedTemporaryFile(suffix=".db", delete=False) + cache_db_path = temp_file.name + temp_file.close() + test_db = SqliteDatabase( cache_db_path, pragmas={ From ae21d6c8d4952bf469b04df46119ef79f1f33224 Mon Sep 17 00:00:00 2001 From: awwaawwa <8493196+awwaawwa@users.noreply.github.com> Date: Sun, 26 Jan 2025 11:16:17 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=F0=9F=94=A7=20chore(ci):=20update=20github?= =?UTF-8?q?=20actions=20dependencies=20and=20versions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - update actions/checkout to v4.1.1 - update actions/upload-artifact to v4.3.0 - update actions/download-artifact to v4.1.0 - update release-drafter to v5.25.0 - update actions/cache to v3.3.2 - update setup-uv to v5.0.0 --- .github/workflows/publish-to-pypi.yml | 14 +++++++------- .github/workflows/test.yml | 10 +++++++--- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 92e64d11..7a0d080e 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -28,14 +28,14 @@ jobs: outputs: is_release: ${{ steps.check-version.outputs.tag }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4.1.1 with: persist-credentials: true fetch-depth: 2 token: ${{ secrets.GITHUB_TOKEN }} - name: Setup uv with Python 3.12 - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v5.0.0 with: python-version: "3.12" enable-cache: true @@ -68,7 +68,7 @@ jobs: run: "uv build" - name: Store the distribution packages - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v4.3.0 with: name: python-package-distributions path: dist/ @@ -89,7 +89,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v4.1.0 with: name: python-package-distributions path: dist/ @@ -113,7 +113,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v4.1.0 with: name: python-package-distributions path: dist/ @@ -138,14 +138,14 @@ jobs: contents: write pull-requests: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4.1.1 with: persist-credentials: true fetch-depth: 2 token: ${{ secrets.GITHUB_TOKEN }} - name: Publish the release notes - uses: release-drafter/release-drafter@v5.23.0 + uses: release-drafter/release-drafter@v5.25.0 with: publish: ${{ needs.build.outputs.is_release != '' }} tag: ${{ needs.build.outputs.is_release }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ef777b51..a0cc0d46 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,6 +5,10 @@ on: pull_request: branches: ["main"] +permissions: + contents: read + pull-requests: read + jobs: test: name: Run Python Tests @@ -14,17 +18,17 @@ jobs: python-version: ["3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false - name: Cached models id: cache-mods - uses: actions/cache@v3 + uses: actions/cache@v3.3.2 with: path: ~/.cache/huggingface/hub/models--wybxc--DocLayout-YOLO-DocStructBench-onnx key: ${{runner.os}}-huggingmodel-wybxc--DocLayout-YOLO-DocStructBench-onnx - name: Setup uv with Python ${{ matrix.python-version }} - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v5.0.0 with: python-version: ${{ matrix.python-version }} enable-cache: true From 75a959187c7d09e5f2e04b95228eee0d91b8b923 Mon Sep 17 00:00:00 2001 From: awwaawwa <8493196+awwaawwa@users.noreply.github.com> Date: Sun, 26 Jan 2025 13:05:17 +0800 Subject: [PATCH 4/5] =?UTF-8?q?=F0=9F=94=A7=20chore(deps):=20pin=20github?= =?UTF-8?q?=20actions=20to=20commit=20hashes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - pin crazy-max/ghaction-github-labeler to v4.1.0 hash - pin astral-sh/setup-uv to v5.2.1 hash - pin salsify/action-detect-and-tag-new-version to v2.0.3 hash - pin pypa/gh-action-pypi-publish to v1.12.4 hash - pin release-drafter/release-drafter to v5.25.0 hash --- .github/workflows/labeler.yml | 2 +- .github/workflows/publish-to-pypi.yml | 10 +++++----- .github/workflows/test.yml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 38b7e8a7..3a027b07 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -14,6 +14,6 @@ jobs: uses: actions/checkout@v3 - name: Run Labeler - uses: crazy-max/ghaction-github-labeler@v4.1.0 + uses: crazy-max/ghaction-github-labeler@3de87da19416edc45c90cd89e7a4ea922a3aae5a # v4.1.0 with: skip-delete: true \ No newline at end of file diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 7a0d080e..d794938d 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -35,7 +35,7 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} - name: Setup uv with Python 3.12 - uses: astral-sh/setup-uv@v5.0.0 + uses: astral-sh/setup-uv@b5f58b2abc5763ade55e4e9d0fe52cd1ff7979ca # v5.2.1 with: python-version: "3.12" enable-cache: true @@ -49,7 +49,7 @@ jobs: - name: Detect and tag new version id: check-version if: steps.check-parent-commit.outputs.sha - uses: salsify/action-detect-and-tag-new-version@v2.0.1 + uses: salsify/action-detect-and-tag-new-version@2e972ec550a624c846f9f37e87376a6f9f6a682a # v2.0.3 with: version-command: | cat pyproject.toml | grep "version = " | head -n 1 | awk -F'"' '{print $2}' @@ -95,7 +95,7 @@ jobs: path: dist/ - name: Publish distribution 📦 to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 publish-to-testpypi: name: Publish Python 🐍 distribution 📦 to TestPyPI @@ -119,7 +119,7 @@ jobs: path: dist/ - name: Publish distribution 📦 to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 with: repository-url: https://test.pypi.org/legacy/ @@ -145,7 +145,7 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} - name: Publish the release notes - uses: release-drafter/release-drafter@v5.25.0 + uses: release-drafter/release-drafter@09c613e259eb8d4e7c81c2cb00618eb5fc4575a7 # v5.25.0 with: publish: ${{ needs.build.outputs.is_release != '' }} tag: ${{ needs.build.outputs.is_release }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a0cc0d46..f8025a84 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,7 @@ jobs: path: ~/.cache/huggingface/hub/models--wybxc--DocLayout-YOLO-DocStructBench-onnx key: ${{runner.os}}-huggingmodel-wybxc--DocLayout-YOLO-DocStructBench-onnx - name: Setup uv with Python ${{ matrix.python-version }} - uses: astral-sh/setup-uv@v5.0.0 + uses: astral-sh/setup-uv@b5f58b2abc5763ade55e4e9d0fe52cd1ff7979ca # v5.2.1 with: python-version: ${{ matrix.python-version }} enable-cache: true From 20ad95c54afdc895e940bd75a5fdce283dee0394 Mon Sep 17 00:00:00 2001 From: awwaawwa <8493196+awwaawwa@users.noreply.github.com> Date: Sun, 26 Jan 2025 13:10:49 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=F0=9F=94=A7=20chore(workflows):=20update?= =?UTF-8?q?=20github=20actions=20configurations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - update labeler workflow to use main branch and specific paths - add required permissions for labeler workflow - update download-artifact actions to newer versions in publish-to-pypi workflow --- .github/workflows/labeler.yml | 12 +++++++++--- .github/workflows/publish-to-pypi.yml | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 3a027b07..18d1d856 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -2,9 +2,15 @@ name: Labeler on: push: - branches: - - main - - master + branches: [main] + paths: + - .github/labels.yml + - .github/workflows/labeler.yml + +permissions: + contents: read + issues: write + pull-requests: write jobs: labeler: diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index d794938d..e9482466 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -89,7 +89,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v4.1.0 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: python-package-distributions path: dist/ @@ -113,7 +113,7 @@ jobs: steps: - name: Download all the dists - uses: actions/download-artifact@v4.1.0 + uses: actions/download-artifact@f44cd7b40bfd40b6aa1cc1b9b5b7bf65639a7c09 # v4.1.7 with: name: python-package-distributions path: dist/