-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathprofile_config_manual.yaml
67 lines (54 loc) · 2.36 KB
/
profile_config_manual.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: MIT
model_repository: model_repository
# One must make sure the full path to the output_model_repository allows to mount it to a child container (i.e. it is the same inside the container and outside it):
output_model_repository_path: output_model_repository
export_path: analyzer_export
checkpoint_directory: analyzer_export/checkpoints
triton_launch_mode: docker
# override_output_model_repository: True
run_config_search_disable: True
# https://github.com/triton-inference-server/model_analyzer/blob/main/docs/metrics.md
inference_output_fields: [
'model_name', 'batch_size', 'concurrency', 'model_config_path',
'instance_group', 'dynamic_batch_sizes', 'satisfies_constraints',
'perf_throughput', 'perf_latency_p99',
'perf_client_response_wait', perf_client_send_recv, perf_server_queue,
perf_server_compute_input, perf_server_compute_infer, perf_server_compute_output
]
#https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md#configuring-model-analyzer
profile_models:
hifigan:
# these are the sets of the parameters of the measurements
# they do note create new model configs
parameters:
concurrency: [1, 16]
batch_sizes: [1] # note this uses a pre-packed batch, so it makes sense only for offline
perf_analyzer_flags:
shape:
- "spec:80,140"
measurement-request-count: 128
# these define the potential model configs
model_config_parameters:
version_policy:
specific:
versions: [[1]]
instance_group:
- kind: KIND_GPU
# Let's check two versions of the model
count: [1, 2]
# https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md#constraint
constraints:
perf_throughput:
min: 75
# https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md#objective
objectives:
- perf_throughput
- perf_latency_p99
# https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md#config-options-for-report
# since there are 2 configs of the model created, we need to include them in the final report
report_model_configs:
- hifigan_config_0
- hifigan_config_1
- hifigan_config_default
num_configs_per_model: 4 # the default is 3, but we have not much more