Skip to content

test

test #97

Workflow file for this run

name: Rust GPU Tests
on:
push:
concurrency:
group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}"
cancel-in-progress: true
jobs:
e2e:
runs-on: gpu
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Validate presence of GPU devices
run: nvidia-smi
- name: Increase shared memory size
run: df -h | grep shm
#
# - name: Install OpenSSL && pkg-config
# run: sudo apt-get update && sudo apt-get install -y pkg-config libssl-dev
#
# - name: Increase shared memory size
# run: sudo sh -c 'echo "tmpfs /dev/shm tmpfs defaults,size=2G 0 0" >> /etc/fstab && sudo mount -o remount /dev/shm'
#
# - name: Install CUDA and NCCL dependencies
# if: steps.cache-cuda-nccl.outputs.cache-hit != 'true'
# env:
# DEBIAN_FRONTEND: noninteractive
# run: |
# wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
# sudo dpkg -i cuda-keyring_1.1-1_all.deb
# sudo apt update
# sudo apt install -y cuda-toolkit-12-2 libnccl2 libnccl-dev
#
# - name: Find libs
# run: find /usr -name "libnvrtc*" && find /usr -name libcuda.so
#
# - name: Cache Rust build
# uses: actions/cache@v3
# id: cache-rust
# with:
# path: |
# ~/.cargo/registry
# ~/.cargo/git
# target
# key: rust-build-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }}
# restore-keys: |
# rust-build-${{ runner.os }}-
#
# - name: Find libs
# run: find /usr -name "libnvrtc*" && find /usr -name libcuda.so
#
# - name: Install Rust nightly
# uses: dtolnay/rust-toolchain@master
# with:
# toolchain: nightly
#
# - name: E2E Tests
# run: cargo test --release e2e
# shell: bash
# env:
# NCCL_DEBUG: info
# NCCL_P2P_LEVEL: LOC
# NCCL_NET: Socket