diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml index e62b8e66..19c64c6c 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml @@ -36,7 +36,7 @@ models: timestamp: '2024-05-23T00:34:02.549319Z' torchscript_onnx_qnn: inference_time: 1917811 - throughput: 0.5214 + throughput: 533.94 estimated_peak_memory_range: min: 20480 max: 1078248176