Skip to content

Commit

Permalink
Use vcpkg to build dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
phoerious committed Dec 4, 2024
1 parent 7a371fd commit 59bda42
Show file tree
Hide file tree
Showing 16 changed files with 226 additions and 204 deletions.
4 changes: 3 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
*/
!compile-third-party-libs.sh
!.vcpkg/
!vcpkg.json
!vcpkg-configuration.json
45 changes: 21 additions & 24 deletions .github/workflows/build-wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ on:
env:
PYTHON_VERSION: "3.12"
FORCE_COLOR: "1"
MACOSX_DEPLOYMENT_TARGET: "12.0"

jobs:
build-wheels:
Expand All @@ -27,17 +28,18 @@ jobs:
os: [ ubuntu-latest, windows-latest, macos-latest ]
env:
CIBW_SKIP: "*-musllinux*"
CIBW_TEST_SKIP: "*-manylinux_aarch64 *-macosx_x86_64" # x86_64 tests are flaky on arm64
CIBW_ARCHS_MACOS: "x86_64 arm64"
CIBW_ARCHS_LINUX: "x86_64"
# CIBW_ARCHS_LINUX: "x86_64 aarch64" # https://github.com/pypa/cibuildwheel/issues/1771#issuecomment-1973003145
CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/chatnoir-eu/resiliparse-manylinux_2_28_x86_64:latest
CIBW_MANYLINUX_AARCH64_IMAGE: ghcr.io/chatnoir-eu/resiliparse-manylinux_2_28_aarch64:latest
CIBW_TEST_SKIP: "*-macosx_arm64 *-manylinux_aarch64" # ARM64 wheels cannot be tested
CIBW_REPAIR_WHEEL_COMMAND_MACOS: >-
DYLD_LIBRARY_PATH=$LIBRARY_PATH delocate-wheel --require-archs {delocate_archs} -w {dest_dir} {wheel}
DYLD_LIBRARY_PATH=$(pwd)/vcpkg_installed/$(echo {delocate_archs} | sed s/x86_64/x64/)-osx/lib
delocate-wheel --require-archs {delocate_archs} -w {dest_dir} {wheel}
CIBW_BEFORE_BUILD_WINDOWS: "python -m pip install delvewheel"
CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: >-
delvewheel repair --add-path C:\vcpkg\installed\x64-windows\bin -w {dest_dir} {wheel}
delvewheel repair --add-path vcpkg_installed\x64-windows\bin -w {dest_dir} {wheel}
steps:
- name: Checkout
Expand All @@ -48,34 +50,29 @@ jobs:
id: cache-vcpkg
if: runner.os == 'macOS' || runner.os == 'Windows'
with:
path: |
/usr/local/share/vcpkg/installed
C:\vcpkg\installed
key: ${{ runner.os }}-vcpkg-17 # INCREMENT ME!!
path: ./vcpkg_installed
key: ${{ runner.os }}-vcpkg-19 # INCREMENT ME!!

- name: Install Vcpkg Dependencies
if: (runner.os == 'macOS' || runner.os == 'Windows') && steps.cache-vcpkg.outputs.cache-hit != 'true'
shell: bash
run: |
set -e
VCPKG_CMD="vcpkg --overlay-ports .vcpkg/ports --overlay-triplets .vcpkg/triplets install"
PKG_LIST="lexbor lz4 re2 uchardet zlib"
set -xe
# Windows
if [ -d /c/vcpkg ]; then
$VCPKG_CMD --triplet=x64-windows $PKG_LIST
vcpkg install --triplet=x64-windows
# MacOS
elif [ "$(uname -s)" == "Darwin" ]; then
sudo chown runner:admin /usr/local/share
git clone https://github.com/Microsoft/vcpkg /usr/local/share/vcpkg
/usr/local/share/vcpkg/bootstrap-vcpkg.sh
ln -s /usr/local/share/vcpkg/vcpkg /usr/local/bin
vcpkg integrate install
$VCPKG_CMD --triplet=x64-osx $PKG_LIST
$VCPKG_CMD --triplet=arm64-osx $PKG_LIST
git clone https://github.com/Microsoft/vcpkg
./vcpkg/bootstrap-vcpkg.sh
# Use to two install roots and merge manually, otherwise the second install clears out the previous
./vcpkg/vcpkg install --triplet=x64-osx
./vcpkg/vcpkg install --triplet=arm64-osx --x-install-root vcpkg_installed2
cp -a vcpkg_installed2/* vcpkg_installed/
rm -r vcpkg_installed2
else
echo "Unsupported platform." >&2
Expand All @@ -94,15 +91,15 @@ jobs:
# platforms: aarch64

- name: Build FastWARC
uses: pypa/cibuildwheel@v2.16.5
uses: pypa/cibuildwheel@v2.22.0
with:
package-dir: fastwarc
output-dir: wheelhouse
env:
CIBW_TEST_COMMAND: python -m pytest --capture=sys --verbose {project}/tests/fastwarc

- name: Build Resiliparse
uses: pypa/cibuildwheel@v2.16.5
uses: pypa/cibuildwheel@v2.22.0
with:
package-dir: resiliparse
output-dir: wheelhouse
Expand Down Expand Up @@ -144,15 +141,15 @@ jobs:
platforms: all

- name: Build FastWARC
uses: pypa/cibuildwheel@v2.16.5
uses: pypa/cibuildwheel@v2.22.0
with:
package-dir: fastwarc
output-dir: wheelhouse
env:
CIBW_TEST_COMMAND: python -m pytest --capture=sys --verbose {project}/tests/fastwarc

- name: Build Resiliparse
uses: pypa/cibuildwheel@v2.16.5
uses: pypa/cibuildwheel@v2.22.0
with:
package-dir: resiliparse
output-dir: wheelhouse
Expand Down
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
*.whl
MANIFEST

# IDEs
Expand Down Expand Up @@ -169,5 +170,6 @@ resiliparse/**/*.html
# data directories
wikidumps/

# 3rd-party library dir
lib-third-party/
# third-party dependencies
vcpkg/
vcpkg_installed/
13 changes: 13 additions & 0 deletions .vcpkg/triplets/aarch64-linux.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set(VCPKG_CMAKE_SYSTEM_NAME Linux)
set(VCPKG_TARGET_ARCHITECTURE arm64)

set(VCPKG_CRT_LINKAGE dynamic)
set(VCPKG_LIBRARY_LINKAGE dynamic)
if(PORT MATCHES "abseil")
set(VCPKG_LIBRARY_LINKAGE static)
endif()

set(VCPKG_BUILD_TYPE release)
set(VCPKG_FIXUP_ELF_RPATH ON)
set(VCPKG_C_FLAGS "-O3")
set(VCPKG_CXX_FLAGS "-std=c++17 -O3")
7 changes: 4 additions & 3 deletions .vcpkg/triplets/arm64-osx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ set(VCPKG_TARGET_ARCHITECTURE arm64)
set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
set(VCPKG_OSX_ARCHITECTURES arm64)

set(VCPKG_BUILD_TYPE release)
set(VCPKG_C_FLAGS "-O3")
set(VCPKG_CXX_FLAGS "-std=c++17 -O3")
set(VCPKG_CRT_LINKAGE dynamic)
set(VCPKG_LIBRARY_LINKAGE dynamic)
if(PORT MATCHES "abseil")
set(VCPKG_LIBRARY_LINKAGE static)
endif()

set(VCPKG_BUILD_TYPE release)
set(VCPKG_C_FLAGS "-O3")
set(VCPKG_CXX_FLAGS "-std=c++17 -O3")
13 changes: 13 additions & 0 deletions .vcpkg/triplets/x64-linux.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set(VCPKG_CMAKE_SYSTEM_NAME Linux)
set(VCPKG_TARGET_ARCHITECTURE x64)

set(VCPKG_CRT_LINKAGE dynamic)
set(VCPKG_LIBRARY_LINKAGE dynamic)
if(PORT MATCHES "abseil")
set(VCPKG_LIBRARY_LINKAGE static)
endif()

set(VCPKG_BUILD_TYPE release)
set(VCPKG_FIXUP_ELF_RPATH ON)
set(VCPKG_C_FLAGS "-O3")
set(VCPKG_CXX_FLAGS "-std=c++17 -O3")
32 changes: 27 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,32 @@ FROM quay.io/pypa/manylinux_2_28_${TARGET_ARCH}:latest

RUN set -x \
&& yum install -y \
gcc-toolset-13-libasan-devel \
lz4-devel \
zlib-devel
curl \
git \
gcc-toolset-13-libasan-devel \
ninja-build \
tar \
unzip \
zip \
&& yum clean all \
&& rm -rf /var/cache/yum

ENV VCPKG_DISABLE_METRICS="1"
ENV VCPKG_FORCE_SYSTEM_BINARIES="1"

COPY .vcpkg /tmp/build/.vcpkg
COPY vcpkg.json vcpkg-configuration.json /tmp/build/

COPY compile-third-party-libs.sh /usr/bin/compile-third-party-libs
RUN set -x \
&& /usr/bin/compile-third-party-libs /usr
&& (cd /tmp/build \
&& TRIPLET=$(uname -m | sed s/x86_64/x64/)-linux \
&& git clone https://github.com/Microsoft/vcpkg \
&& ./vcpkg/bootstrap-vcpkg.sh \
&& ./vcpkg/vcpkg install --triplet=${TRIPLET} \
&& mkdir /opt/libs \
&& mv vcpkg_installed/${TRIPLET}/* /opt/libs/) \
&& rm -rf /tmp/build

ENV CPATH="/opt/libs/include"
ENV LIBRARY_PATH="/opt/libs/lib"
ENV LD_LIBRARY_PATH="/opt/libs/lib"
51 changes: 27 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,41 +49,44 @@ FastWARC is being distributed as its own package and can be installed like so:
```bash
pip install fastwarc
```
For optimal performance, however, it is recommended to build FastWARC from sources instead of relying on the pre-built binaries. See below for more information.

## Building From Source
To build Resiliparse or FastWARC from sources, you need to install all required build-time dependencies first. On Ubuntu, this is done as follows:
To build Resiliparse and FastWARC from sources, you need to install all required build-time dependencies listed in `vcpkg.json`. It's possible to install them globally via your package manager, but the easiest and most consistent way is to use [vcpkg](https://vcpkg.io/en/):
```bash
# Add Lexbor repository
curl -sL https://lexbor.com/keys/lexbor_signing.key | \
sudo gpg --dearmor --output /etc/apt/trusted.gpg.d/lexbor.gpg
echo "deb https://packages.lexbor.com/ubuntu/ $(lsb_release -sc) liblexbor" | \
sudo tee /etc/apt/sources.list.d/lexbor.list

# Install build dependencies (requires libre2-dev>=2022-04-01)
sudo apt update
sudo apt install build-essential python3-dev zlib1g-dev \
liblz4-dev libuchardet-dev liblexbor-dev libre2-dev
# Install vcpkg itself (skip if you have a working vcpkg installation already)
git clone https://github.com/Microsoft/vcpkg
./vcpkg/bootstrap-vcpkg.sh

# Install dependencies to vcpkg_installed (must be run from sources root)
./vcpkg/vcpkg install --triplet=x64-linux
```
Then, to build the actual packages, run:
Replace the triplet value with one suitable for your platform. Valid values are: `x64-windows`, `x64-osx`, `arm64-osx`, `aarch64-linux` (or any of the vcpkg default triplets).

After installing the dependencies, you can build the actual Python packages:
```bash
# Optional: Create a fresh venv first
# Create a fresh venv first (recommended)
python3 -m venv venv && source venv/bin/activate

# Build and install Resiliparse
pip install -e resiliparse
# Option 1: Build and install in editable mode (best for development)
python3 -m pip install -e ./fastwarc ./resiliparse

# Build and install FastWARC
pip install -e fastwarc
# Option 2 (alternative): Build and install wheels in separate steps (best for redistribution)
python3 -m pip wheel -w build ./fastwarc ./resiliparse
ls ./build/*.whl | xargs python3 -m pip install
```
Instead of building the packages from this repository, you can also build them from the PyPi source packages:
In most cases, the build routine should be smart enough to detect the location of the installed vcpkg dependencies. However, in some cases you may be getting errors about missing header files or undefined symbols. This can happen if you don't build from the source repository, use Python's new `build` module, or run `pip wheel` with `--isolated`. To work around that, set the `RESILIPARSE_VCPKG_PATH` environment variable to the absolute path of the vcpkg installation directory:
```bash
# Build Resiliparse from PyPi
pip install --no-binary resiliparse resiliparse
export RESILIPARSE_VCPKG_PATH="$(pwd)/vcpkg_installed"
```

# Build FastWARC from PyPi
pip install --no-binary fastwarc fastwarc
**NOTE:** Unless you fix up the wheels to embed the linked shared libraries (via [auditwheel](https://github.com/pypa/auditwheel) on Linux, [delocate-wheel](https://github.com/matthew-brett/delocate) on macOS, or [delvewheel](https://github.com/adang1345/delvewheel) on Windows), you will have to add the vcpkg library directory (`vcpkg_installed/TRIPLET/lib`) to your library search path to use them. On Linux, add the directory path to the `LD_LIBRARY_PATH` environment variable, on macOS to `DYLD_LIBRARY_PATH`. On Windows, you have to add the directory to the `Path` environment variable.

Here's an example of how to use `auditwheel` on Linux to fix up the build wheels:
```bash
LD_LIBRARY_PATH=$(pwd)/vcpkg_installed/x64-linux/lib \
auditwheel repair --plat linux_x86_64 build/Resiliparse*.whl build/FastWARC*.whl
```
(Please note that `linux_x86_64` platform wheels are [not suitable for general redistribution](https://packaging.python.org/en/latest/specifications/platform-compatibility-tags/#platform-tag).)


## Cite Us

Expand Down
76 changes: 0 additions & 76 deletions compile-third-party-libs.sh

This file was deleted.

Loading

0 comments on commit 59bda42

Please sign in to comment.