cm-mlops/script/app-mlperf-inference/_cm.yaml

# Identification of this CM script
alias: app-mlperf-inference
uid: d775cac873ee4231

automation_alias: script
automation_uid: 5b4e0237da074764

category: "Modular MLPerf benchmarks"
category_sort: 20000

developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), [Grigori Fursin](https://cKnowledge.org/gfursin)"

# User-friendly tags to find this CM script
tags:
  - app
  - vision
  - language
  - mlcommons
  - mlperf
  - inference
  - generic

# Default environment
default_env:
  CM_MLPERF_LOADGEN_MODE: accuracy
  CM_MLPERF_LOADGEN_SCENARIO: Offline
  CM_OUTPUT_FOLDER_NAME: test_results
  CM_MLPERF_RUN_STYLE: test
  CM_TEST_QUERY_COUNT: '10'
  CM_MLPERF_QUANTIZATION: off

# Map script inputs to environment variables
input_mapping:
  count: CM_MLPERF_LOADGEN_QUERY_COUNT
  docker: CM_RUN_DOCKER_CONTAINER
  hw_name: CM_HW_NAME
  imagenet_path: IMAGENET_PATH
  max_batchsize: CM_MLPERF_LOADGEN_MAX_BATCHSIZE
  mode: CM_MLPERF_LOADGEN_MODE
  num_threads: CM_NUM_THREADS
  output_dir: OUTPUT_BASE_DIR
  power: CM_MLPERF_POWER
  power_server: CM_MLPERF_POWER_SERVER_ADDRESS
  ntp_server: CM_MLPERF_POWER_NTP_SERVER
  max_amps: CM_MLPERF_POWER_MAX_AMPS
  max_volts: CM_MLPERF_POWER_MAX_VOLTS
  regenerate_files: CM_REGENERATE_MEASURE_FILES
  rerun: CM_RERUN
  scenario: CM_MLPERF_LOADGEN_SCENARIO
  test_query_count: CM_TEST_QUERY_COUNT
  clean: CM_MLPERF_CLEAN_SUBMISSION_DIR
  target_qps: CM_MLPERF_LOADGEN_TARGET_QPS
  target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY
  offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS
  server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS
  singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY
  multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY
  readme: CM_MLPERF_README
  debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM

# Duplicate CM environment variables to the ones used in native apps
env_key_mappings:
  CM_HOST_: HOST_
  CM_ML_: ML_
  CM_MLPERF_TVM: MLPERF_TVM

# Env keys which are exposed to higher level scripts
new_env_keys:
  - CM_MLPERF_*


# Dependencies on other CM scripts
deps:

  # Detect host OS features
  - tags: detect,os

  # Install system dependencies on a given host
  - tags: get,sys-utils-cm

  # Detect/install python
  - tags: get,python
    names:
    - python
    - python3


  ########################################################################
  # Install MLPerf inference dependencies

  # Download MLPerf inference source
  - tags: get,mlcommons,inference,src
    names:
    - inference-src

# Order of variations for documentation
variation_groups_order:
  - implementation
  - backend
  - device
  - model
  - precision
  - execution-mode
  - reproducibility

# Variations to customize dependencies
variations:
  # Implementation (cpp, reference/python, nvidia, tflite-cpp)
  cpp:
    group:
      implementation
    add_deps_recursive:
      imagenet-accuracy-script:
        tags: _int64
    env:
      CM_MLPERF_CPP: 'yes'
      CM_MLPERF_IMPLEMENTATION: cpp
      CM_IMAGENET_ACCURACY_DTYPE: float32
      CM_OPENIMAGES_ACCURACY_DTYPE: float32
    posthook_deps:
      - names:
         - cpp-mlperf-inference
         - mlperf-inference-implementation
        tags: app,mlperf,cpp,inference
        skip_if_env:
          CM_SKIP_RUN:
            - yes


  tflite-cpp:
    default_variations:
      backend: tflite
      device: cpu
    group:
      implementation
    add_deps_recursive:
      imagenet-accuracy-script:
        tags: _float32
    env:
      CM_MLPERF_TFLITE_CPP: 'yes'
      CM_MLPERF_CPP: 'yes'
      CM_MLPERF_IMPLEMENTATION: tflite-cpp
      CM_IMAGENET_ACCURACY_DTYPE: float32
    posthook_deps:
      - names:
         - tflite-cpp-mlperf-inference
         - mlperf-inference-implementation
        tags: app,mlperf,tflite-cpp,inference
        skip_if_env:
          CM_SKIP_RUN:
            - yes

  reference:
    group:
      implementation
    default:
      true
    add_deps_recursive:
      imagenet-accuracy-script:
        tags: _float32
      squad-accuracy-script:
        tags: _float32
      librispeech-accuracy-script:
        tags: _int32
    env:
      CM_MLPERF_PYTHON: 'yes'
      CM_MLPERF_IMPLEMENTATION: reference
      CM_SQUAD_ACCURACY_DTYPE: float32
      CM_IMAGENET_ACCURACY_DTYPE: float32
      CM_OPENIMAGES_ACCURACY_DTYPE: float32
      CM_LIBRISPEECH_ACCURACY_DTYPE: float32
    posthook_deps:
      - names:
         - python-reference-mlperf-inference
         - mlperf-inference-implementation
        tags: app,mlperf,reference,inference
        skip_if_env:
          CM_SKIP_RUN:
            - yes

  python:
    alias: reference
  
  nvidia:
    default_variations:
      backend: tensorrt
      device: cuda
    group:
      implementation
    add_deps_recursive:
      imagenet-accuracy-script:
        tags: _int32
      squad-accuracy-script:
        tags: _float16
      python:
        version-min: 3.8.0
    env:
      CM_MLPERF_IMPLEMENTATION: nvidia
      CM_SQUAD_ACCURACY_DTYPE: float16
      CM_IMAGENET_ACCURACY_DTYPE: int32
    deps:
      ## Nvidia common code
      - tags: get,mlperf,inference,nvidia,common-code
      - tags: get,mlperf,training,src
      - tags: get,generic-python-lib,_nvidia-pyindex
      - tags: get,generic-python-lib,_nvidia-tensorrt
      - tags: get,generic-python-lib,_numpy
      - tags: get,generic-python-lib,_pycuda
      - tags: get,generic-python-lib,_mlperf_logging
      - tags: get,generic-python-lib,_onnx
  
  nvidia-original:
    default_variations:
      backend: tensorrt
      device: cuda
    group:
      implementation
    add_deps_recursive:
      imagenet-accuracy-script:
        tags: _int32
      squad-accuracy-script:
        tags: _float16
      librispeech-accuracy-script:
        tags: _int8
    env:
      CM_MLPERF_IMPLEMENTATION: nvidia-original
      CM_SQUAD_ACCURACY_DTYPE: float16
      CM_IMAGENET_ACCURACY_DTYPE: int32
      CM_LIBRISPEECH_ACCURACY_DTYPE: int8
    posthook_deps:
      - names:
         - nvidia-original-mlperf-inference
         - nvidia-harness
         - mlperf-inference-implementation
        tags: reproduce,mlperf,nvidia,inference
        skip_if_env:
          CM_SKIP_RUN:
            - yes

  resnet50:
    group:
      model
    default:
      true
    env:
      CM_MODEL:
        resnet50
    deps:
    - tags: get,dataset-aux,imagenet-aux
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _resnet50
    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - mlperf-accuracy-script
      - imagenet-accuracy-script
      tags: run,accuracy,mlperf,_imagenet

  retinanet:
    group:
      model
    env:
      CM_MODEL:
        retinanet
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _retinanet
    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - mlperf-accuracy-script
      - openimages-accuracy-script
      tags: run,accuracy,mlperf,_openimages

  3d-unet-99:
    group:
      model
    base:
    - 3d-unet_
    env:
      CM_MODEL:
        3d-unet-99
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _3d-unet-99

  3d-unet-99.9:
    group:
      model
    base:
    - 3d-unet_
    env:
      CM_MODEL:
        3d-unet-99.9
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _3d-unet-99.9

  3d-unet_:
    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - mlperf-accuracy-script
      - 3d-unet-accuracy-script
      tags: run,accuracy,mlperf,_kits19,_int8

  rnnt:
    group:
      model
    env:
      CM_MODEL:
        rnnt
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _rnnt
    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - mlperf-accuracy-script
      - librispeech-accuracy-script
      tags: run,accuracy,mlperf,_librispeech

  gptj:
    group:
      model
    env:
      CM_MODEL:
        gptj
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _gptj
    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - cnndm-accuracy-script
      - mlperf-accuracy-script
      tags: run,accuracy,mlperf,_cnndm
  bert_:
    deps:
    - skip_if_env:
        CM_DATASET_SQUAD_VAL_PATH: "on"
      tags: get,dataset,squad,language-processing
    - skip_if_env:
        CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH": "on"
      tags: get,dataset-aux,squad-vocab

    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - squad-accuracy-script
      - mlperf-accuracy-script
      tags: run,accuracy,mlperf,_squad
    add_deps_recursive:
      inference-src:
        tags: _deeplearningexamples

  bert-99:
    group:
      model
    base:
    - bert_
    env:
      CM_MODEL:
        bert-99
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _bert-99

  bert-99.9:
    group:
      model
    base:
    - bert_
    env:
      CM_MODEL:
        bert-99.9
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _bert-99.9

  dlrm_:
    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - terabyte-accuracy-script
      - mlperf-accuracy-script
      tags: run,accuracy,mlperf,_terabyte,_float32

  dlrm-99:
    group:
      model
    base:
    - dlrm_
    env:
      CM_MODEL:
        dlrm-99
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _dlrm-99

  dlrm-99.9:
    group:
      model
    base:
    - dlrm_
    env:
      CM_MODEL:
        dlrm-99.9
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _dlrm-99.9

  mobilenet:
    group:
      model
    env:
      CM_MODEL:
        mobilenet
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _mobilenet
    deps:
    - tags: get,dataset-aux,imagenet-aux
    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - mlperf-accuracy-script
      - imagenet-accuracy-script
      tags: run,accuracy,mlperf,_imagenet

  efficientnet:
    group:
      model
    env:
      CM_MODEL:
        efficientnet
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _efficientnet
    deps:
    - tags: get,dataset-aux,imagenet-aux
    post_deps:
    - enable_if_env:
        CM_MLPERF_LOADGEN_MODE:
        - accuracy
        - all
        CM_MLPERF_ACCURACY_RESULTS_DIR:
        - 'on'
      names:
      - mlperf-accuracy-script
      - imagenet-accuracy-script
      tags: run,accuracy,mlperf,_imagenet

  onnxruntime:
    group: backend
    default: true
    env:
      CM_MLPERF_BACKEND:
        onnxruntime
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _onnxruntime

  tensorrt:
    group: backend
    env:
      CM_MLPERF_BACKEND:
        tensorrt
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _tensorrt

  tf:
    group: backend
    env:
      CM_MLPERF_BACKEND:
        tf
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _tf

  pytorch:
    group: backend
    env:
      CM_MLPERF_BACKEND:
        pytorch
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _pytorch

  deepsparse:
    group: backend
    default_variations:
      precision: int8
    env:
      CM_MLPERF_BACKEND:
        deepsparse
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _deepsparse

  tflite:
    group: backend
    env:
      CM_MLPERF_BACKEND: tflite
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _tflite

  tvm-onnx:
    group: backend
    base:
      - batch_size.32
    env:
      CM_MLPERF_BACKEND: tvm-onnx
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _tvm-onnx

  tvm-pytorch:
    group: backend
    env:
      CM_MLPERF_BACKEND: tvm-pytorch
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _tvm-pytorch

  tvm-tflite:
    group: backend
    base:
      - batch_size.1
    env:
      CM_MLPERF_BACKEND: tvm-tflite
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _tvm-tflite

  cpu:
    group:
      device
    default:
      True
    env:
      CM_MLPERF_DEVICE:
        cpu
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _cpu
  cuda:
    group:
      device
    env:
      CM_MLPERF_DEVICE:
        gpu
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _cuda

  # Execution modes
  fast:
    group: execution-mode
    env:
      CM_FAST_FACTOR: '5'
      CM_OUTPUT_FOLDER_NAME: fast_results
      CM_MLPERF_RUN_STYLE: fast

  test:
    group: execution-mode
    default: true
    env:
      CM_OUTPUT_FOLDER_NAME: test_results
      CM_MLPERF_RUN_STYLE: test

  valid:
    group: execution-mode
    env:
      CM_OUTPUT_FOLDER_NAME: valid_results
      CM_MLPERF_RUN_STYLE: valid

  # Model precision
  quantized:
    alias: int8

  fp32:
    group: precision
    default: true
    env:
      CM_MLPERF_QUANTIZATION: off
      CM_MLPERF_MODEL_PRECISION: float32
    add_deps_recursive:
      python-reference-mlperf-inference:
        tags: _fp32

  float16:
    group: precision
    env:
      CM_MLPERF_QUANTIZATION: off
      CM_MLPERF_MODEL_PRECISION: float32
    add_deps_recursive:
      python-reference-mlperf-inference:
        tags: _float16

  bfloat16:
    group: precision
    env:
      CM_MLPERF_QUANTIZATION: off
      CM_MLPERF_MODEL_PRECISION: float32
    add_deps_recursive:
      python-reference-mlperf-inference:
        tags: _bfloat16

  int8:
    group: precision
    env:
      CM_MLPERF_QUANTIZATION: on
      CM_MLPERF_MODEL_PRECISION: int8
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _int8

  uint8:
    group: precision
    env:
      CM_MLPERF_QUANTIZATION: on
      CM_MLPERF_MODEL_PRECISION: uint8
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _uint8

  power:
    env:
      CM_MLPERF_POWER: 'yes'
      CM_SYSTEM_POWER: 'yes'
    add_deps_recursive:
      runner:
        tags:
          _mlperf-power

  batch_size.#:
    env:
      CM_MLPERF_LOADGEN_MAX_BATCHSIZE: '#'
    add_deps_recursive:
      mlperf-inference-implementation:
        tags: _batch_size.#

  # Reproducibility (past submissions)
  r2.1_default:
    group:
      reproducibility
    add_deps_recursive:
      compiler:
        tags: llvm
      inference-src:
        tags: _octoml
      loadgen:
        version: r2.1
    env:
      CM_RERUN: 'yes'
      CM_SKIP_SYS_UTILS: 'yes'
      CM_TEST_QUERY_COUNT: '100'

invalid_variation_combinations:
  -
    - resnet50
    - pytorch
  -
    - retinanet
    - tf
  -
    - nvidia-original
    - tf
  -
    - nvidia-original
    - onnxruntime
  -
    - nvidia-original
    - pytorch
  -
    - nvidia
    - tf
  -
    - nvidia
    - onnxruntime
  -
    - nvidia
    - pytorch
  -
    - gptj
    - tf

gui_title: "CM GUI for the MLPerf inference benchmark"

input_description:
  scenario: 
    desc: "MLPerf inference scenario"
    choices:
      - Offline
      - Server
      - SingleStream
      - MultiStream
    default: Offline
  mode: 
    desc: "MLPerf inference mode"
    choices: 
      - performance
      - accuracy
    default: accuracy
  test_query_count: 
    desc: "Specifies the number of samples to be processed during a test run"
  target_qps: 
    desc: "Target QPS"
  target_latency: 
    desc: "Target Latency"
  max_batchsize: 
    desc: "Maximum batchsize to be used"
  num_threads: 
    desc: "Number of CPU threads to launch the application with"
  hw_name: 
    desc: "Valid value - any system description which has a config file (under same name) defined [here](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-configs-sut-mlperf-inference/configs)"
  output_dir: 
    desc: "Location where the outputs are produced"
  rerun: 
    desc: "Redo the run even if previous run files exist"
    boolean: true
    default: true
  regenerate_files: 
    desc: "Regenerates measurement files including accuracy.txt files even if a previous run exists. This option is redundant if `--rerun` is used"
    boolean: true
  adr.python.name:
    desc: "Python virtual environment name (optional)"
    default: mlperf
  adr.python.version_min:
    desc: "Minimal Python version"
    default: "3.8"
  adr.python.version:
    desc: "Force Python version (must have all system deps)"
  adr.compiler.tags:
    desc: "Compiler for loadgen"
    default: gcc
  adr.inference-src-loadgen.env.CM_GIT_URL:
    desc: "Git URL for MLPerf inference sources to build LoadGen (to enable non-reference implementations)"
  adr.inference-src.env.CM_GIT_URL:
    desc: "Git URL for MLPerf inference sources to run benchmarks (to enable non-reference implementations)"
  quiet:
    desc: "Quiet run (select default values for all questions)"
    boolean: true
    default: false
  readme: 
    desc: "Generate README with the reproducibility report"
  debug: 
    desc: "Debug MLPerf script"