Skip to content

Commit

Permalink
Dockerize TG Freq
Browse files Browse the repository at this point in the history
  • Loading branch information
afuller-TT committed Feb 25, 2025
1 parent cbe0e1a commit ec49992
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 14 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/pipeline-select-galaxy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,14 @@ jobs:
uses: ./.github/workflows/tg-unit-tests-impl.yaml
if: ${{ inputs.tg-unit }}
tg-frequent-tests:
if: ${{ inputs.tg-frequent }}
needs: build-artifact
secrets: inherit
uses: ./.github/workflows/tg-frequent-tests-impl.yaml
if: ${{ inputs.tg-frequent }}
with:
docker-image: ${{ needs.build-artifact.outputs.ci-build-docker-image }}
wheel-artifact-name: ${{ needs.build-artifact.outputs.wheel-artifact-name }}
build-artifact-name: ${{ needs.build-artifact.outputs.build-artifact-name }}
tg-model-perf-tests:
needs: build-artifact
secrets: inherit
Expand Down
62 changes: 49 additions & 13 deletions .github/workflows/tg-frequent-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@ name: "[internal] TG frequent tests"

on:
workflow_call:
inputs:
docker-image:
required: true
type: string
wheel-artifact-name:
required: true
type: string
build-artifact-name:
required: true
type: string

jobs:
tg-frequent-tests:
Expand All @@ -13,36 +23,62 @@ jobs:
{ name: "TG resnet50 frequent tests", arch: wormhole_b0, model: resnet50, timeout: 90, owner_id: U013121KDH9}, # Austin Ho
{ name: "TG unit/distributed frequent tests", arch: wormhole_b0, model: unit, timeout: 90, owner_id: XXXXX}, # Add owner
]
env:
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
runs-on:
- arch-wormhole_b0
- config-tg
- in-service
- bare-metal
- pipeline-functional
container:
image: ${{ inputs.docker-image }}
env:
TT_METAL_HOME: /work
PYTHONPATH: /work
LD_LIBRARY_PATH: /work/build/lib
LOGURU_LEVEL: INFO
ARCH_NAME: ${{ matrix.test-group.arch }}
volumes:
- ${{ github.workspace }}/docker-job:/work # Subdir to workaround https://github.com/actions/runner/issues/691
- /dev/hugepages-1G:/dev/hugepages-1G
- /mnt/MLPerf:/mnt/MLPerf
options: "--device /dev/tenstorrent"
defaults:
run:
shell: bash
working-directory: /work # https://github.com/actions/runner/issues/878
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- name: ⬇️ Checkout
uses: actions/checkout@v4
with:
submodules: recursive
path: docker-job # Here be dragons; keep it scoped to our desired volume, yet must be under github.workspace and be sure to clean up at the end
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_any
name: ${{ inputs.build-artifact-name }}
path: /work
- name: Extract files
run: tar -xvf ttm_any.tar
- uses: ./.github/actions/install-python-deps
- name: ⬇️ Download Wheel
uses: actions/download-artifact@v4
with:
name: ${{ inputs.wheel-artifact-name }}
path: /work
- name: Install Wheel
run: |
WHEEL_FILENAME=$(ls -1 *.whl)
pip3 install $WHEEL_FILENAME
- name: Run frequent regression tests
timeout-minutes: ${{ matrix.test-group.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_tg_device --dispatch-mode "" --model ${{ matrix.test-group.model }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
owner: ${{ matrix.test-group.owner_id }}
- name: Cleanup
if: always()
run: |
# We are forced to checkout the repo into a subdir of the host's workdir; this pollutes the host
# with root-owned files. Be sure to clean up after ourselves in case we're on a non-ephemeral runner.
rm -rf /__w/tt-metal/tt-metal/docker-job
6 changes: 6 additions & 0 deletions .github/workflows/tg-frequent-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@ jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
with:
build-wheel: true
tg-frequent-tests:
needs: build-artifact
secrets: inherit
uses: ./.github/workflows/tg-frequent-tests-impl.yaml
with:
docker-image: ${{ needs.build-artifact.outputs.ci-build-docker-image }}
wheel-artifact-name: ${{ needs.build-artifact.outputs.wheel-artifact-name }}
build-artifact-name: ${{ needs.build-artifact.outputs.build-artifact-name }}

0 comments on commit ec49992

Please sign in to comment.