.github/workflows/perf_benchmark.yaml

name: Performance Benchmark

on:
  workflow_dispatch: # run on request (no need for PR)
    inputs:
      model-category:
        type: choice
        description: Model category to run benchmark
        options:
          - speed
          - balance
          - accuracy
          - default # speed, balance, accuracy models only
          - all # default + other models
        default: all
      data-group:
        type: choice
        description: Data group to run benchmark
        options:
          - small
          - medium
          - large
          - all
        default: all
      num-repeat:
        description: Overrides default per-data-group number of repeat setting
        default: 0
      num-epoch:
        description: Overrides default per-model number of epoch setting
        default: 0
      eval-upto:
        type: choice
        description: The last operation to evaluate. 'optimize' means all.
        options:
          - train
          - export
          - optimize
        default: optimize
      pytest-args:
        type: string
        description: |
          Additional perf-benchmark pytest arguments.
          "-k detection" -> detection task only
          "--dry-run" -> print command w/o execution.
      data-root:
        type: string
        description: Root directory containing validation data in CI server.
        default: "/home/validation/data/v2/"
      otx-ref:
        type: string
        description: |
          Target OTX ref (tag / branch name / commit hash) on main repo to test. Defaults to the current branch.
          `pip install otx[full]@https://github.com/openvinotoolkit/training_extensions.git@{otx_ref}` will be executed before run,
          and reverted after run. Works only for v2.x assuming CLI compatibility.
        default: __CURRENT_BRANCH_COMMIT__
  workflow_call:
    inputs:
      model-category:
        type: string
        description: Model category to run benchmark [speed, balance, accuracy, default, all]
        default: default
      data-group:
        type: string
        description: Data group to run benchmark [small, medium, large, all]
        default: all
      num-repeat:
        type: number
        description: Overrides default per-data-group number of repeat setting
        default: 0
      num-epoch:
        type: number
        description: Overrides default per-model number of epoch setting
        default: 0
      eval-upto:
        type: string
        description: The last operation to evaluate. 'optimize' means all. [train, export, optimize]
        default: optimize
      pytest-args:
        type: string
        description: |
          Additional perf-benchmark pytest arguments.
          "-k detection" -> detection task only
          "--dry-run" -> print command w/o execution.
      data-root:
        type: string
        description: Root directory containing validation data in CI server.
        default: "/home/validation/data/v2/"
      otx-ref:
        type: string
        description: |
          Target OTX ref (tag / branch name / commit hash) on main repo to test. Defaults to the current branch.
          `pip install otx[full]@https://github.com/openvinotoolkit/training_extensions.git@{otx_ref}` will be executed before run,
          and reverted after run. Works only for v2.x assuming CLI compatibility.
        default: __CURRENT_BRANCH_COMMIT__

# Declare default permissions as read only.
permissions: read-all

jobs:
  Perf-Benchmark-Run:
    strategy:
      fail-fast: false
      matrix:
        include:
          - task-short: "ano"
            task: "anomaly"
          - task-short: "cls"
            task: "classification"
          - task-short: "det"
            task: "detection"
          - task-short: "isg"
            task: "instance_segmentation"
          - task-short: "ssg"
            task: "semantic_segmentation"
          - task-short: "vsp"
            task: "visual_prompting"
    name: Perf-Benchmark-${{ matrix.task-short }}
    runs-on: [self-hosted, linux, x64, dmount-v2]
    timeout-minutes: 8640
    steps:
      - name: Checkout repository
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      - name: Install Python
        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
        with:
          python-version: "3.10"
      - name: Install tox
        run: python -m pip install --require-hashes --no-deps -r .ci/requirements/tox/requirements.txt
      - name: Run Performance Test
        env:
          BENCHMARK_RESULTS_CLEAR: ${{ vars.BENCHMARK_RESULTS_CLEAR }}
          GH_CTX_REF_NAME: ${{ github.ref_name }}
          GH_CTX_SHA: ${{ github.sha }}
        run: >
          tox -vv -e perf-benchmark -- tests/perf/test_${{ matrix.task }}.py ${{ inputs.pytest-args }}
          --model-category ${{ inputs.model-category }}
          --data-root ${{ inputs.data-root }}
          --data-group ${{ inputs.data-group }}
          --num-repeat ${{ inputs.num-repeat }}
          --num-epoch ${{ inputs.num-epoch }}
          --eval-upto ${{ inputs.eval-upto }}
          --summary-file .tox/perf-benchmark-summary.xlsx
          --mlflow-tracking-uri ${{ vars.MLFLOW_TRACKING_SERVER_URI }}
          --user-name ${{ github.triggering_actor }}
          --otx-ref ${{ inputs.otx-ref }}
      - name: Upload test results
        uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
        with:
          name: perf-benchmark-${{ matrix.task-short }}
          path: .tox/perf-benchmark-*.*
        # Use always() to always run this step to publish test results when there are test failures
        if: ${{ always() }}

  Perf-Benchmark-Summary:
    if: ${{ always() }}
    needs: Perf-Benchmark-Run
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
      - name: Install Python
        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
        with:
          python-version: "3.10"
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install --require-hashes --no-deps -r .ci/requirements/benchmark/requirements.txt
      - name: Download benchmark results
        uses: actions/download-artifact@87c55149d96e628cc2ef7e6fc2aab372015aec85 # v4.1.3
        with:
          path: tests/perf/history/latest
      - name: Summarize benchamrk results
        run: |
          python tests/perf/history/summary.py tests/perf/history ./perf-benchmark-summary --pattern "*raw*.csv" --normalize
          jupyter nbconvert --execute --to html --no-input tests/perf/history/summary.ipynb --output-dir ./perf-benchmark-summary --output perf-benchmark-summary
      - name: Upload benchmark summary
        uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
        with:
          name: perf-benchmark-summary
          path: perf-benchmark-summary
        # Use always() to always run this step to publish test results when there are test failures
        if: ${{ always() }}