From 5dd2298b6ba3bf8ea2c0ba78889e2da84d3b4319 Mon Sep 17 00:00:00 2001 From: James Walker Date: Wed, 4 Dec 2024 10:48:52 -0500 Subject: [PATCH 1/3] fix: bacalhau dind w/ nvidia --- docker/bacalhau/Dockerfile | 39 ++++++++++++++++++++++++++++------ docker/docker-compose.base.yml | 3 ++- docker/docker-compose.yml | 5 +++++ 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/docker/bacalhau/Dockerfile b/docker/bacalhau/Dockerfile index 0ecd31e0..bc4fc75a 100644 --- a/docker/bacalhau/Dockerfile +++ b/docker/bacalhau/Dockerfile @@ -1,14 +1,39 @@ -FROM docker:dind AS base +FROM nvidia/cuda:12.0.1-cudnn8-runtime-ubuntu22.04 -RUN apk update -RUN apk add wget -RUN apk add bash +WORKDIR /app +RUN apt update && \ + apt install -y apt-utils bash ca-certificates curl gnupg iptables && \ + apt clean + +# Install NVIDIA CTK and Docker +RUN mkdir -pm755 /etc/apt/keyrings && curl -o /etc/apt/keyrings/docker.asc -fsSL "https://download.docker.com/linux/ubuntu/gpg" && chmod a+r /etc/apt/keyrings/docker.asc && \ + mkdir -pm755 /etc/apt/sources.list.d && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(grep UBUNTU_CODENAME= /etc/os-release | cut -d= -f2 | tr -d '\"') stable" > /etc/apt/sources.list.d/docker.list && \ + mkdir -pm755 /usr/share/keyrings && curl -fsSL "https://nvidia.github.io/libnvidia-container/gpgkey" | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && \ + curl -fsSL "https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list" | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' > /etc/apt/sources.list.d/nvidia-container-toolkit.list && \ + apt-get update && \ + apt-get install -y nvidia-container-toolkit docker-ce docker-ce-cli && \ + apt clean +RUN nvidia-ctk runtime configure --runtime=docker --set-as-default + +# Install Bacalhau ADD https://github.com/bacalhau-project/bacalhau/releases/download/v1.3.2/bacalhau_v1.3.2_linux_amd64.tar.gz . RUN tar xfv bacalhau_v1.3.2_linux_amd64.tar.gz RUN mv bacalhau /usr/local/bin -HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ - CMD wget http://localhost:1234/api/v1/agent/alive -q || exit 1 +ADD https://raw.githubusercontent.com/moby/moby/refs/heads/master/hack/dind /usr/local/bin/dind +RUN chmod +x /usr/local/bin/dind + +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=10 \ + CMD curl http://localhost:1234/api/v1/agent/alive -q || exit 1 + +VOLUME /var/lib/docker + +RUN touch run +RUN echo "#!/bin/bash" >> run +RUN echo "dind dockerd &" >> run +RUN echo 'until pgrep "dockerd" >/dev/null; do sleep 0.5; done' >> run +RUN echo 'exec "$@"' >> run +RUN chmod a+x ./run -ENTRYPOINT [ "bacalhau" ] \ No newline at end of file +ENTRYPOINT [ "./run" ] \ No newline at end of file diff --git a/docker/docker-compose.base.yml b/docker/docker-compose.base.yml index 4f853ccd..b70dee39 100644 --- a/docker/docker-compose.base.yml +++ b/docker/docker-compose.base.yml @@ -61,6 +61,7 @@ services: image: ghcr.io/lilypad-tech/bacalhau container_name: bacalhau restart: unless-stopped + privileged: true depends_on: ipfs: condition: service_healthy @@ -75,6 +76,7 @@ services: - 1234:1234 command: [ + "bacalhau", "serve", "--node-type", "compute,requester", @@ -87,7 +89,6 @@ services: ] volumes: - bacalhau-data:/root/.bacalhau - - /var/run/docker.sock:/var/run/docker.sock volumes: chain-data: ipfs-data: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index fcda47e9..b89c33a7 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -14,13 +14,18 @@ services: image: ghcr.io/lilypad-tech/bacalhau container_name: bacalhau restart: unless-stopped + privileged: true depends_on: ipfs: condition: service_healthy + build: + context: .. + dockerfile: ./docker/bacalhau/Dockerfile environment: - BACALHAU_ENVIRONMENT=local command: [ + "bacalhau", "serve", "--node-type", "compute,requester", From c961963d0311f36cf72cb7ed3d8cd88636bb211f Mon Sep 17 00:00:00 2001 From: James Walker Date: Wed, 4 Dec 2024 18:49:56 -0500 Subject: [PATCH 2/3] fix: smaller RP image --- docker/resource-provider/Dockerfile | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docker/resource-provider/Dockerfile b/docker/resource-provider/Dockerfile index 5c5f3b45..fe7872f0 100644 --- a/docker/resource-provider/Dockerfile +++ b/docker/resource-provider/Dockerfile @@ -1,6 +1,6 @@ ARG COMPUTE_MODE=gpu -FROM nvidia/cuda:12.0.1-cudnn8-devel-ubuntu22.04 AS base +FROM golang:1.22.4 AS base WORKDIR /usr/src/app ARG NETWORK=testnet ARG VERSION @@ -12,23 +12,13 @@ ENV BACALHAU_API_HOST="localhost" ENV WEB3_PRIVATE_KEY="" ENV DISABLE_TELEMETRY=false -# Install necessary dependencies -RUN apt update && apt install -y wget bash curl && apt clean - -# Install Bacalhau -RUN cd /tmp && \ - wget https://github.com/bacalhau-project/bacalhau/releases/download/v1.3.2/bacalhau_v1.3.2_linux_amd64.tar.gz && \ - tar xfv bacalhau_v1.3.2_linux_amd64.tar.gz && \ - mv bacalhau /usr/local/bin/bacalhau && \ - rm bacalhau_v1.3.2_linux_amd64.tar.gz - -# Build and install Lilypad -COPY --from=golang:1.22.4-alpine /usr/local/go/ /usr/local/go/ -ENV PATH="/usr/local/go/bin:${PATH}" - COPY . . -FROM base AS build-gpu +FROM nvidia/cuda:12.0.1-cudnn8-devel-ubuntu22.04 AS build-gpu +WORKDIR /usr/src/app +COPY --from=base /usr/src/app . +COPY --from=base /usr/local/go/ /usr/local/go/ +ENV PATH="/usr/local/go/bin:${PATH}" RUN nvcc --version && nvcc --ptx -o ./pkg/resourceprovider/cudaminer/keccak.ptx ./pkg/resourceprovider/cudaminer/keccak.cu RUN go build -v -tags cuda -ldflags="-X 'github.com/lilypad-tech/lilypad/pkg/system.Version=${VERSION}' -X 'github.com/lilypad-tech/lilypad/pkg/system.CommitSHA=${COMMIT_SHA}'" ENV DISABLE_POW=false @@ -40,6 +30,16 @@ ENV DISABLE_POW=true FROM build-$COMPUTE_MODE AS final RUN mv lilypad /usr/local/bin +# Install necessary dependencies +RUN apt update && apt install -y wget bash curl && apt clean + +# Install Bacalhau +RUN cd /tmp && \ + wget https://github.com/bacalhau-project/bacalhau/releases/download/v1.3.2/bacalhau_v1.3.2_linux_amd64.tar.gz && \ + tar xfv bacalhau_v1.3.2_linux_amd64.tar.gz && \ + mv bacalhau /usr/local/bin/bacalhau && \ + rm bacalhau_v1.3.2_linux_amd64.tar.gz + # Add both lilypad and bacalhau executables to PATH ENV PATH="/usr/local/bin:${PATH}" From 78dfeb6f621a36e4260b44e1f259474d9ac4a17f Mon Sep 17 00:00:00 2001 From: James Walker Date: Fri, 6 Dec 2024 17:20:51 -0500 Subject: [PATCH 3/3] fix: PR feedback --- docker/bacalhau/Dockerfile | 1 + docker/docker-compose.yml | 1 - docker/resource-provider/Dockerfile | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/bacalhau/Dockerfile b/docker/bacalhau/Dockerfile index bc4fc75a..e9d0869b 100644 --- a/docker/bacalhau/Dockerfile +++ b/docker/bacalhau/Dockerfile @@ -31,6 +31,7 @@ VOLUME /var/lib/docker RUN touch run RUN echo "#!/bin/bash" >> run +RUN echo "find /run /var/run -iname 'docker*.pid' -delete" >> run RUN echo "dind dockerd &" >> run RUN echo 'until pgrep "dockerd" >/dev/null; do sleep 0.5; done' >> run RUN echo 'exec "$@"' >> run diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index b89c33a7..0923e5c2 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -38,7 +38,6 @@ services: ] volumes: - bacalhau-data:/root/.bacalhau - - /var/run/docker.sock:/var/run/docker.sock resource-provider: image: ghcr.io/lilypad-tech/resource-provider:latest container_name: resource-provider diff --git a/docker/resource-provider/Dockerfile b/docker/resource-provider/Dockerfile index fe7872f0..ee04be1c 100644 --- a/docker/resource-provider/Dockerfile +++ b/docker/resource-provider/Dockerfile @@ -14,7 +14,7 @@ ENV DISABLE_TELEMETRY=false COPY . . -FROM nvidia/cuda:12.0.1-cudnn8-devel-ubuntu22.04 AS build-gpu +FROM nvidia/cuda:12.0.1-cudnn8-devel-ubuntu22.04 AS build-gpu WORKDIR /usr/src/app COPY --from=base /usr/src/app . COPY --from=base /usr/local/go/ /usr/local/go/