Skip to content

Commit

Permalink
Merge pull request #309 from mila-iqia/H100
Browse files Browse the repository at this point in the history
H100
  • Loading branch information
Delaunay authored Nov 21, 2024
2 parents 08eebc1 + 8498689 commit 4fdf736
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 28 deletions.
6 changes: 3 additions & 3 deletions milabench/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""This file is generated, do not modify"""

__tag__ = "v0.1.0-129-ga60a3aa"
__commit__ = "a60a3aae21e87e46bcce403620a3f56c12878554"
__date__ = "2024-11-06 22:52:12 -0500"
__tag__ = "v1.0.0_RC1-12-g3b87cb4"
__commit__ = "3b87cb465e855be452953273c314ab01024e0925"
__date__ = "2024-10-09 12:04:43 -0400"
3 changes: 3 additions & 0 deletions milabench/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ def fetch_runs(folder, filter):
runs = []
ignored = 0
for run in os.listdir(folder):
if run.startswith("install") or run.startswith("prepare"):
continue

if filter is not None and (not fnmatch.fnmatch(run, filter)):
ignored += 1
continue
Expand Down
1 change: 1 addition & 0 deletions milabench/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ def resolve_hostname(ip):
if is_loopback(ip):
return hostname, True

return socket.gethostname(), hostname.startswith(socket.gethostname())
return hostname, hostname == socket.gethostname()

except:
Expand Down
37 changes: 12 additions & 25 deletions scripts/article/run_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ install_prepare() {
# Install milabench's benchmarks in their venv
#
# pip install torch
milabench pin --variant cuda --from-scratch $ARGS
# milabench pin --variant cuda --from-scratch $ARGS
milabench install --system $MILABENCH_WORDIR/system.yaml $ARGS

which pip
Expand All @@ -70,9 +70,9 @@ install_prepare() {
milabench prepare --system $MILABENCH_WORDIR/system.yaml $ARGS
}

module load cuda/12.3.2
# module load cuda/12.3.2

if [ ! -d "$MILABENCH_WORDIR/results" ]; then
if [ ! -d "$MILABENCH_WORDIR/env" ]; then
install_prepare
else
echo "Reusing previous install"
Expand All @@ -87,8 +87,9 @@ if [ "$MILABENCH_PREPARE" -eq 0 ]; then
# pip install torch
# milabench pin --variant cuda --from-scratch
# rm -rf $MILABENCH_WORDIR/results/venv/
# rm -rf $MILABENCH_WORDIR/results/extra
# milabench install --system $MILABENCH_WORDIR/system.yaml
rm -rf $MILABENCH_WORDIR/results/extra

milabench install --system $MILABENCH_WORDIR/system.yaml
milabench prepare --system $MILABENCH_WORDIR/system.yaml $ARGS

(
Expand All @@ -98,28 +99,14 @@ if [ "$MILABENCH_PREPARE" -eq 0 ]; then
# pip install torchao --no-input
)

# pip install torch
# milabench pin --variant cuda --from-scratch
# milabench install --system $MILABENCH_WORDIR/system.yaml --force $ARGS
# milabench prepare --system $MILABENCH_WORDIR/system.yaml $ARGS

# ARGS="--select resnet50-noio,brax,lightning,dinov2-giant-single,dinov2-giant-gpus,llm-lora-ddp-gpus,llm-lora-ddp-nodes,llm-lora-mp-gpus,llm-full-mp-gpus,llm-full-mp-nodes,dqn,ppo,dimenet,llava-single,rlhf-single,rlhf-gpus,vjepa-single,vjepa-gpus"

# MEMORY_CAPACITY=("4Go" "8Go" "16Go" "32Go" "64Go" "80Go")
# # MEMORY_CAPACITY=("2048" "4096" "8192")

# # Run the benchmakrs
# for CAPACITY in "${MEMORY_CAPACITY[@]}"; do
# export MILABENCH_SIZER_AUTO=1
# export MILABENCH_SIZER_MULTIPLE=8
# export MILABENCH_SIZER_CAPACITY=$CAPACITY
# # export MILABENCH_SIZER_BATCH_SIZE=$CAPACITY
# milabench run --run-name "c$CAPACITY.{time}" --system $MILABENCH_WORDIR/system.yaml $ARGS || true
# done

milabench run --system $MILABENCH_WORDIR/system.yaml $ARGS

#
# Display report
milabench report --runs $MILABENCH_WORDIR/results/runs
fi
fi


# rsync -av [email protected]:~/rocm/results/cache ~/cuda/results/cache
# rsync -av [email protected]:~/rocm/results/data ~/cuda/results/data
# rsync -av [email protected]:~/rocm/results/cache ~/cuda/results/cache

0 comments on commit 4fdf736

Please sign in to comment.