diff --git a/.github/workflows/release_pypi.yaml b/.github/workflows/release_pypi.yaml index 7f442a68..bb2ae2fc 100644 --- a/.github/workflows/release_pypi.yaml +++ b/.github/workflows/release_pypi.yaml @@ -2,36 +2,218 @@ name: Release PyPi on: workflow_dispatch: - release: - types: - - created +# release: +# types: +# - created + pull_request: + branches: [main] jobs: - build_wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} + linux: + name: Build ${{ matrix.platform.runner}} ${{ matrix.platform.target }} + runs-on: ${{ matrix.platform.runner }} strategy: matrix: - # FIXME: rather do one job by OS - os: [ubuntu-latest, macos-latest, windows-latest] + platform: + # older ubuntu to avoid messing with glibc version + - runner: ubuntu-22.04 + target: x86_64 + manylinux: auto + interpreter: "3.9 3.10 3.11 3.12 3.13" + - runner: ubuntu-22.04 + target: aarch64 + manylinux: manylinux_2_28 + interpreter: "3.9 3.10 3.11 3.12 3.13" + steps: - uses: actions/checkout@v3 + - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.10' - # - name: Set up Rust - # uses: dtolnay/rust-toolchain@stable - # - uses: Swatinem/rust-cache@v2 - - name: Build wheels with Maturin + python-version: "3.13" + + - run: pip install -U twine + + - name: Install required packages + run: | + sudo apt update + sudo apt install pkg-config gcc-aarch64-linux-gnu g++-aarch64-linux-gnu -qy + + - uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + target: ${{ matrix.platform.target}}-unknown-linux-gnu + - uses: Swatinem/rust-cache@v2 + + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} --features python-bindings + sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} + manylinux: ${{ matrix.platform.manylinux }} + + - name: Validate Python package distributions + run: twine check --strict dist/* + + - name: Install built wheel + if: matrix.platform.target == 'x86_64' + run: | + pip install outlines_core --no-index --find-links dist --force-reinstall + python -c "import outlines_core" + + - uses: actions/upload-artifact@v4 + with: + path: dist/*.whl + name: wheels-linux-${{ matrix.platform.target }} + + - uses: actions/download-artifact@v4 + with: + pattern: wheels-* + merge-multiple: true + path: dist + + - run: ls -R dist + + windows: + name: Build ${{ matrix.platform.runner}} ${{ matrix.platform.target }} + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: windows-latest + target: x86 + alias-target: i686-pc-windows-msvc + interpreter: "3.9 3.10 3.11 3.12 3.13" + - runner: windows-latest + target: x64 + alias-target: x86_64-pc-windows-msvc + interpreter: "3.9 3.10 3.11 3.12 3.13" + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + architecture: ${{ matrix.platform.target }} + + - run: pip install -U twine + + - name: Install required packages + # rustls requires aws-lc-sys, which FFI bindings to AWS-LC (AWS Libcrypto) + # aws-lc-sys requires nasm for compilation on windows + run: | + choco install nasm + + - uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + target: ${{ matrix.platform.alias-target}} + - uses: Swatinem/rust-cache@v2 + + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} --features python-bindings + sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} + manylinux: auto + + - name: Validate Python package distributions + run: twine check --strict dist/* + + - name: Install built wheel + run: | + pip install outlines_core --no-index --find-links dist --force-reinstall + python -c "import outlines_core" + + - uses: actions/upload-artifact@v4 + with: + path: dist/*.whl + name: wheels-windows-${{ matrix.platform.target }} + + - uses: actions/download-artifact@v4 + with: + pattern: wheels-* + merge-multiple: true + path: dist + + - run: ls -R dist + + macos: + name: Build ${{ matrix.platform.runner}} ${{ matrix.platform.target }} + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: macos-14 + target: x86_64 + macos_version: "14.0" + interpreter: "3.9 3.10 3.11 3.12 3.13" + - runner: macos-14 + target: aarch64 + macos_version: "14.0" + interpreter: "3.9 3.10 3.11 3.12 3.13" + - runner: macos-15 + target: x86_64 + macos_version: "15.0" + interpreter: "3.9 3.10 3.11 3.12 3.13" + - runner: macos-15 + target: aarch64 + macos_version: "15.0" + interpreter: "3.9 3.10 3.11 3.12 3.13" + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - run: pip install -U twine + + - name: Set macOS version + run: echo "MACOSX_DEPLOYMENT_TARGET=${{ matrix.platform.macos_version }}" >> $GITHUB_ENV + + - uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + target: ${{ matrix.platform.target}}-apple-darwin + - uses: Swatinem/rust-cache@v2 + + - name: Build wheels uses: PyO3/maturin-action@v1 with: - command: build - args: --release --out dist # --features python-bindings + target: ${{ matrix.platform.target }} + args: --release --out dist --interpreter ${{ matrix.platform.interpreter }} --features python-bindings + sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} + manylinux: auto + + - name: Validate Python package distributions + run: twine check --strict dist/* + + - name: Install built wheel + if: matrix.platform.target == 'aarch64' + run: | + pip install outlines_core --no-index --find-links dist --force-reinstall + python -c "import outlines_core" + - uses: actions/upload-artifact@v4 with: path: dist/*.whl - name: wheels-${{ matrix.os }} + name: wheels-${{ matrix.platform.runner }}-${{ matrix.platform.target }} + + - uses: actions/download-artifact@v4 + with: + pattern: wheels-* + merge-multiple: true + path: dist + + - run: ls -R dist build_sdist: name: Build source distribution @@ -52,6 +234,13 @@ jobs: path: dist/*.tar.gz name: sdist + - uses: actions/download-artifact@v4 + with: + name: sdist + path: dist + + - run: ls -R dist + # release: # name: Release to PyPI # needs: [build_wheels, build_sdist] @@ -66,6 +255,7 @@ jobs: # with: # name: sdist # path: dist + # - name: Publish to PyPI # uses: pypa/gh-action-pypi-publish@v1.5.0 # with: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 20330a34..215f0111 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,10 +37,9 @@ jobs: python-version: ${{ matrix.python-version }} - name: Set up test environment run: | - # python -m venv .venv - # source .venv/bin/activate - # echo "VIRTUAL_ENV=$(pwd)/.venv" >> $GITHUB_ENV - # echo "$(pwd)/.venv/bin" >> $GITHUB_PATH + python -m venv .venv + source .venv/bin/activate + python -m pip install --upgrade pip pip install .[test] maturin develop - name: Create matrix id @@ -54,7 +53,7 @@ jobs: echo "::set-output name=id::$MATRIX_ID" - name: Run tests run: | - pytest --cov=outlines_core -vv + .venv/bin/python -m pytest --cov=outlines_core -vv env: COVERAGE_FILE: .coverage.${{ steps.matrix-id.outputs.id }} - name: Upload coverage data @@ -84,7 +83,11 @@ jobs: - name: Set up environment run: | - pip install --upgrade "coverage[toml]>=5.1" diff-cover + python -m venv .venv + source .venv/bin/activate + python -m pip install --upgrade pip + pip install --upgrade "coverage[toml]>=5.1" diff-cover maturin + maturin develop - name: Install lcov run: sudo apt-get update && sudo apt-get install -yqq lcov diff --git a/Cargo.toml b/Cargo.toml index 32ffa8f7..a93598a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,12 +16,22 @@ serde-pyobject = { version = "0.5.0", optional = true } serde_json = { version = "1.0", features = ["preserve_order"] } serde = {version = "1.0", features = ["derive"]} bincode = "2.0.0-rc.3" -# Fragile dependencies, minor updates often break the code -hf-hub = "=0.3.2" -tokenizers = { version = "=0.20.3", features = ["http"] } rustc-hash = "2.1.0" regex-automata = "0.4.9" +# Below are fragile dependencies, even minor updates of which often break the code +[dependencies.hf-hub] +version = "=0.4.1" +features = ["ureq", "rustls-tls"] +default-features = false + +[dependencies.tokenizers] +git = "https://github.com/huggingface/tokenizers.git" +# This is a version > 0.21.0 before 0.21.1, we're looking for rustls-tls feature +# Once released could be pinned to 0.21.1+ +rev = "4f1a810aa258d287e6936315e63fbf58bde2a980" +features = ["http", "rustls-tls"] + [features] python-bindings = ["pyo3", "serde-pyobject"] diff --git a/environment.yml b/environment.yml index d3777345..9ee3f305 100644 --- a/environment.yml +++ b/environment.yml @@ -12,7 +12,6 @@ dependencies: - pydantic - pytest - pre-commit - - jsonschema - pip - pip: - -e ".[test]" diff --git a/pyproject.toml b/pyproject.toml index 5179829a..96c7adfe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,9 +23,6 @@ classifiers = [ "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] -dependencies = [ - "jsonschema", -] dynamic = ["version"] [project.optional-dependencies] @@ -42,7 +39,7 @@ test = [ "scipy", "asv", "psutil", - "maturin", + "maturin[patchelf]", ] [project.urls] @@ -54,9 +51,6 @@ repository = "https://github.com/dottxt-ai/outlines-core" file="README.md" content-type = "text/markdown" -[tool.cibuildwheel] -skip = ["*-musllinux_i686"] - [tool.setuptools] packages = ["outlines_core"] package-dir = {"" = "python"} @@ -86,7 +80,6 @@ explicit_package_bases = true [[tool.mypy.overrides]] module = [ - "jsonschema.*", "pydantic.*", "pytest", "setuptools.*", diff --git a/src/python_bindings/mod.rs b/src/python_bindings/mod.rs index 548ff1d3..64148cb2 100644 --- a/src/python_bindings/mod.rs +++ b/src/python_bindings/mod.rs @@ -337,7 +337,9 @@ pub fn build_regex_from_schema_py( } fn register_child_module(parent_module: &Bound<'_, PyModule>) -> PyResult<()> { - let m = PyModule::new_bound(parent_module.py(), "json_schema")?; + let m = PyModule::new(parent_module.py(), "json_schema")?; + parent_module.add_submodule(&m)?; + m.add("BOOLEAN", json_schema::BOOLEAN)?; m.add("DATE", json_schema::DATE)?; m.add("DATE_TIME", json_schema::DATE_TIME)?; @@ -352,16 +354,21 @@ fn register_child_module(parent_module: &Bound<'_, PyModule>) -> PyResult<()> { m.add("EMAIL", json_schema::EMAIL)?; m.add("URI", json_schema::URI)?; m.add_function(wrap_pyfunction!(build_regex_from_schema_py, &m)?)?; - parent_module.add_submodule(&m)?; + + let sys = PyModule::import(m.py(), "sys")?; + let sys_modules_bind = sys.as_ref().getattr("modules")?; + let sys_modules = sys_modules_bind.downcast::()?; + sys_modules.set_item("outlines_core.json_schema", &m)?; + Ok(()) } -// FIXME: there's an unwanted outlines_core.outlines_core module generated by PyO3... #[pymodule] fn outlines_core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; register_child_module(m)?; + Ok(()) } diff --git a/src/vocabulary/mod.rs b/src/vocabulary/mod.rs index 43bb6e7a..e2824c50 100644 --- a/src/vocabulary/mod.rs +++ b/src/vocabulary/mod.rs @@ -160,7 +160,7 @@ impl Vocabulary { NormalizerWrapper::Sequence(normalization_sequence) => { let new_sequence = Sequence::new( normalization_sequence - .get_normalizers() + .as_ref() .iter() .filter_map(|normalizer| match normalizer { NormalizerWrapper::Prepend(_) => None, @@ -465,7 +465,7 @@ mod tests { if let Some(n) = normalized_t.get_normalizer() { match n { NormalizerWrapper::Sequence(seq) => { - for n in seq.get_normalizers() { + for n in seq.as_ref() { if let NormalizerWrapper::Prepend(_) = n { unreachable!() }