diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml index 748615cc..1c032c61 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/perf.yaml @@ -8,18 +8,18 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-23T00:34:02.549319Z' + timestamp: '2024-07-01T19:11:33.087816Z' torchscript_onnx_qnn: - inference_time: 90268 - throughput: 11.07 + inference_time: 88438 + throughput: 11.307 estimated_peak_memory_range: - min: 66715648 - max: 4562679888 + min: 95744000 + max: 4468197056 layer_info: - layers_on_npu: 34842 + layers_on_npu: 33818 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 34842 + total_layers: 33818 precision: uint16 primary_compute_unit: NPU job_id: "null" @@ -31,18 +31,18 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-23T00:34:02.549319Z' + timestamp: '2024-07-01T19:09:26.083951Z' torchscript_onnx_qnn: - inference_time: 118139 - throughput: 8.46 + inference_time: 95960 + throughput: 10.421 estimated_peak_memory_range: - min: 68124672 - max: 68124672 + min: 68235264 + max: 68235264 layer_info: - layers_on_npu: 34842 + layers_on_npu: 33818 layers_on_gpu: 0 layers_on_cpu: 0 - total_layers: 34842 + total_layers: 33818 precision: uint16 primary_compute_unit: NPU job_id: "null" @@ -56,13 +56,13 @@ models: os_name: Android manufacturer: Samsung chipset: Snapdragon® 8 Gen 3 - timestamp: '2024-05-23T00:34:02.549319Z' + timestamp: '2024-07-01T20:53:21.204302Z' torchscript_onnx_qnn: - inference_time: 1917811 - throughput: 533.94 + inference_time: 1484949 + throughput: 689.5859 estimated_peak_memory_range: - min: 20480 - max: 1078248176 + min: 8421376 + max: 1809446256 layer_info: layers_on_npu: 31766 layers_on_gpu: 0 @@ -79,13 +79,13 @@ models: os_name: Windows manufacturer: Qualcomm chipset: Snapdragon® X Elite - timestamp: '2024-05-23T00:34:02.549319Z' + timestamp: '2024-07-02T00:17:42.777637Z' torchscript_onnx_qnn: - inference_time: 2302575 - throughput: 445.21 + inference_time: 1889092 + throughput: 542.059 estimated_peak_memory_range: - min: 10788864 - max: 10788864 + min: 10784768 + max: 10784768 layer_info: layers_on_npu: 31766 layers_on_gpu: 0 @@ -124,4 +124,4 @@ aggregated: precision: uint16 primary_compute_unit: NPU job_id: "" - job_status: Passed + job_status: Passed \ No newline at end of file diff --git a/qai_hub_models/models/llama_v2_7b_chat_quantized/requirements.txt b/qai_hub_models/models/llama_v2_7b_chat_quantized/requirements.txt index 621a93f9..ffb2e641 100644 --- a/qai_hub_models/models/llama_v2_7b_chat_quantized/requirements.txt +++ b/qai_hub_models/models/llama_v2_7b_chat_quantized/requirements.txt @@ -1,2 +1,3 @@ transformers==4.41.1 sentencepiece==0.2.0 +psutil