Skip to content

Commit

Permalink
d
Browse files Browse the repository at this point in the history
  • Loading branch information
weizhoublue committed Aug 2, 2024
1 parent 0a67a72 commit 4b6d7cc
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 2 deletions.
2 changes: 1 addition & 1 deletion rdma-tools/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ git push --tags

```shell
helm repo add spiderchart https://spidernet-io.github.io/charts
helm repo update
helm repo update spiderchart
helm search repo rdma-tools

# run daemonset on worker1 and worker2
Expand Down
3 changes: 2 additions & 1 deletion rdma-tools/image/install-tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ InstallNccl(){
wget --no-check-certificate ${ENV_CUDA_DEB_SOURCE}
dpkg -i *.deb
apt-get update
apt install -y libnccl2 libnccl-dev
apt install --allow-change-held-packages -y libnccl2 libnccl-dev
rm * -rf || true

echo "ulimit -l 2000000" >> /etc/bash.bashrc
Expand Down Expand Up @@ -110,6 +110,7 @@ packages=(
jq
inxi
hwloc
libgomp1
)

export DEBIAN_FRONTEND=noninteractive
Expand Down
2 changes: 2 additions & 0 deletions rdma-tools/image/tools/testNcclTest
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ HOST_LIST=$( echo -n "${POD_IP_LIST}" | tr '\n' ',' )
CMD_NAME=${CMD_NAME:-"all_reduce_perf"}
CMD_OPTIONS=${CMD_OPTIONS:-"-b 512M -e 8G -f 2 -n 1 "}

# todo: check ethernet or infiband.
# todo: for infiniband , test sharp: -x NCCL_COLLNET_ENABLE=1 -x NCCL_ALGO=CollNet

echo ""
echo "***************************************************************************************************************"
Expand Down

0 comments on commit 4b6d7cc

Please sign in to comment.