From 6c3e9508ed6ea45ce611bb011b977a2ddc85ef48 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Tue, 7 Jan 2025 15:24:06 +0000 Subject: [PATCH] Remove eos_token_id --- tests/python_tests/test_kv_cache_eviction.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py index 72d5f8d09f..a647d1bda6 100644 --- a/tests/python_tests/test_kv_cache_eviction.py +++ b/tests/python_tests/test_kv_cache_eviction.py @@ -203,9 +203,9 @@ class LongBenchTestData: @pytest.mark.precommit @pytest.mark.parametrize("test_struct", [ - LongBenchTestData("samsum", 36.78, 14, 9.596), + LongBenchTestData("samsum", 37.84, 11.8, 7.68), LongBenchTestData("trec", 28.12, 11.8, 7.721), - LongBenchTestData("qasper", 21.68, 18.4, 12.706), + LongBenchTestData("qasper", 15.88, 11.8, 6.483), ]) def test_optimized_generation_longbench(qwen2_converted_model, test_struct): seqs_per_request = 32 @@ -215,12 +215,10 @@ def test_optimized_generation_longbench(qwen2_converted_model, test_struct): model_name = "/".join(models_path.parts[-2:]) subset = test_struct.subset max_new_tokens = dataset2maxlen[subset] - tokenizer = qwen2_converted_model.tokenizer generation_config = GenerationConfig() # expecting default greedy sampling generation_config.num_return_sequences = 1 generation_config.max_new_tokens = max_new_tokens - generation_config.eos_token_id = tokenizer.eos_token_id scheduler_config.use_cache_eviction = True if scheduler_config.use_cache_eviction: