profile_config_manual.yaml

# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: MIT


model_repository: model_repository
# One must make sure the full path to the output_model_repository allows to mount it to a child container (i.e. it is the same inside the container and outside it):
output_model_repository_path: output_model_repository
export_path: analyzer_export
checkpoint_directory: analyzer_export/checkpoints
triton_launch_mode: docker
# override_output_model_repository: True
run_config_search_disable: True

# https://github.com/triton-inference-server/model_analyzer/blob/main/docs/metrics.md
inference_output_fields: [
  'model_name', 'batch_size', 'concurrency', 'model_config_path',
  'instance_group', 'dynamic_batch_sizes', 'satisfies_constraints',
  'perf_throughput', 'perf_latency_p99',
  'perf_client_response_wait', perf_client_send_recv, perf_server_queue,
  perf_server_compute_input, perf_server_compute_infer, perf_server_compute_output
]

#https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md#configuring-model-analyzer
profile_models:
  hifigan:
    # these are the sets of the parameters of the measurements
    # they do note create new model configs
    parameters:
      concurrency: [1, 16]
      batch_sizes: [1] # note this uses a pre-packed batch, so it makes sense only for offline
    perf_analyzer_flags:
      shape: 
      - "spec:80,140"
      measurement-request-count: 128
    # these define the potential model configs
    model_config_parameters:
      version_policy: 
        specific:
          versions: [[1]]
      instance_group:
      - kind: KIND_GPU
        # Let's check two versions of the model
        count: [1, 2]

# https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md#constraint
constraints:
  perf_throughput:
    min: 75

# https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md#objective
objectives:
  - perf_throughput
  - perf_latency_p99


# https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md#config-options-for-report
# since there are 2 configs of the model created, we need to include them in the final report
report_model_configs: 
  - hifigan_config_0
  - hifigan_config_1
  - hifigan_config_default

num_configs_per_model: 4 # the default is 3, but we have not much more