From fb2c3321056677b3d4528aed4436ea0fc65d57e4 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Sat, 8 Feb 2025 00:11:52 +0400
Subject: [PATCH 01/14] Generation config separation

---
 tests/python_tests/__init__.py                |   2 +
 tests/python_tests/common.py                  | 121 +----------------
 .../python_tests/test_continuous_batching.py  |   8 +-
 .../python_tests/test_llm_pipeline_static.py  |   2 +-
 tests/python_tests/test_sampling.py           |   2 +-
 tests/python_tests/utils/__init__.py          |   2 +
 tests/python_tests/utils/generation_config.py | 125 ++++++++++++++++++
 7 files changed, 138 insertions(+), 124 deletions(-)
 create mode 100644 tests/python_tests/__init__.py
 create mode 100644 tests/python_tests/utils/__init__.py
 create mode 100644 tests/python_tests/utils/generation_config.py

diff --git a/tests/python_tests/__init__.py b/tests/python_tests/__init__.py
new file mode 100644
index 0000000000..6e922cea12
--- /dev/null
+++ b/tests/python_tests/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
\ No newline at end of file
diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index 64482e6fc0..ca26923e48 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -13,126 +13,9 @@
 from transformers import GenerationConfig as HFGenerationConfig
 from typing import List, Tuple, Callable
 
-TESTS_ROOT = Path(__file__).parent
+from utils.generation_config import get_greedy, get_beam_search
 
-def get_greedy() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.num_return_sequences = 1
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_greedy_with_penalties() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.num_return_sequences = 1
-    generation_config.presence_penalty = 2.0
-    generation_config.frequency_penalty = 0.2
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_beam_search() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.num_beam_groups = 3
-    generation_config.num_beams = 6
-    generation_config.diversity_penalty = 1
-    generation_config.max_new_tokens = 30
-    generation_config.num_return_sequences = 3
-    generation_config.num_return_sequences = generation_config.num_beams
-    return generation_config
-
-def get_multinomial_temperature() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.do_sample = True
-    generation_config.temperature = 0.8
-    generation_config.num_return_sequences = 1
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_temperature_and_num_return_sequence() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.do_sample = True
-    generation_config.temperature = 0.7
-    generation_config.num_return_sequences = 3
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_temperature_and_top_p() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.num_return_sequences = 1
-    generation_config.do_sample = True
-    generation_config.temperature = 0.8
-    generation_config.top_p = 0.9
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_temperature_and_top_k() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.do_sample = True
-    generation_config.num_return_sequences = 1
-    generation_config.temperature = 0.8
-    generation_config.top_k = 2
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_temperature_top_p_and_top_k() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.do_sample = True
-    generation_config.temperature = 0.8
-    generation_config.top_p = 0.9
-    generation_config.num_return_sequences = 1
-    generation_config.top_k = 2
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_temperature_and_repetition_penalty() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.do_sample = True
-    generation_config.num_return_sequences = 1
-    generation_config.temperature = 0.8
-    generation_config.repetition_penalty = 2.0
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_all_parameters() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.do_sample = True
-    generation_config.num_return_sequences = 4
-    generation_config.temperature = 0.9
-    generation_config.top_p = 0.8
-    generation_config.top_k = 20
-    generation_config.repetition_penalty = 2.0
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_temperature_and_frequence_penalty() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.do_sample = True
-    generation_config.temperature = 0.8
-    generation_config.frequency_penalty = 0.5
-    generation_config.num_return_sequences = 1
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_temperature_and_presence_penalty() -> GenerationConfig:
-    generation_config = GenerationConfig()
-    generation_config.do_sample = True
-    generation_config.temperature = 0.8
-    generation_config.presence_penalty = 0.1
-    generation_config.num_return_sequences = 1
-    generation_config.max_new_tokens = 30
-    return generation_config
-
-def get_multinomial_max_and_min_token() -> GenerationConfig:
-    multinomial = GenerationConfig()
-    multinomial.do_sample = True
-    multinomial.temperature = 0.9
-    multinomial.top_p = 0.9
-    multinomial.top_k = 20
-    multinomial.num_return_sequences = 3
-    multinomial.presence_penalty = 0.01
-    multinomial.frequency_penalty = 0.1
-    multinomial.min_new_tokens = 15
-    multinomial.max_new_tokens = 30
-    return multinomial
+TESTS_ROOT = Path(__file__).parent
 
 def get_test_dataset() -> Tuple[List[str], List[GenerationConfig]]:
     prompts = [
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index 8afcc8061c..808b8682ae 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -11,9 +11,7 @@
 from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, GenerationConfig, SchedulerConfig,  Tokenizer, draft_model
 
 from common import get_default_properties, get_hugging_face_models, convert_models, generate_and_compare_with_reference_text, \
-    get_scheduler_config, get_greedy, run_cb_pipeline_with_ref, get_beam_search, get_greedy, \
-    get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
-    get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
+    get_scheduler_config, run_cb_pipeline_with_ref
 from test_sampling import RandomSamplingTestStruct, get_current_platform_ref_texts
 
 from ov_genai_test_utils import (
@@ -35,6 +33,10 @@ def read_models_list(file_name: str):
 
 from shutil import rmtree
 
+from utils.generation_config import get_greedy, get_beam_search, \
+    get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
+    get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
+
 #
 # e2e tests on random and real models
 #
diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py
index ae5c475fd9..dd329eb131 100644
--- a/tests/python_tests/test_llm_pipeline_static.py
+++ b/tests/python_tests/test_llm_pipeline_static.py
@@ -14,7 +14,7 @@
 )
 from common import get_default_properties
 
-from common import                                      \
+from utils.generation_config import                     \
     get_greedy,                                         \
     get_greedy_with_penalties,                          \
     get_multinomial_all_parameters,                     \
diff --git a/tests/python_tests/test_sampling.py b/tests/python_tests/test_sampling.py
index fa445e96f1..edc5f1a29a 100644
--- a/tests/python_tests/test_sampling.py
+++ b/tests/python_tests/test_sampling.py
@@ -164,7 +164,7 @@ class RandomSamplingTestStruct:
     prompts: List[str]
     ref_texts: List[List[str]]
 
-from common import get_multinomial_temperature, get_greedy_with_penalties, \
+from utils.generation_config import get_multinomial_temperature, get_greedy_with_penalties, \
     get_multinomial_temperature_and_top_k, get_multinomial_temperature_and_top_p, \
     get_multinomial_temperature_top_p_and_top_k, get_multinomial_all_parameters, \
     get_multinomial_temperature_and_num_return_sequence, get_multinomial_max_and_min_token, \
diff --git a/tests/python_tests/utils/__init__.py b/tests/python_tests/utils/__init__.py
new file mode 100644
index 0000000000..6e922cea12
--- /dev/null
+++ b/tests/python_tests/utils/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
\ No newline at end of file
diff --git a/tests/python_tests/utils/generation_config.py b/tests/python_tests/utils/generation_config.py
new file mode 100644
index 0000000000..1a78eeaedd
--- /dev/null
+++ b/tests/python_tests/utils/generation_config.py
@@ -0,0 +1,125 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# todo: CVS-162108: remove this file to habdle generation config directly in tests
+
+from openvino_genai import GenerationConfig
+
+def get_greedy() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.num_return_sequences = 1
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_greedy_with_penalties() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.num_return_sequences = 1
+    generation_config.presence_penalty = 2.0
+    generation_config.frequency_penalty = 0.2
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_beam_search() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.num_beam_groups = 3
+    generation_config.num_beams = 6
+    generation_config.diversity_penalty = 1
+    generation_config.max_new_tokens = 30
+    generation_config.num_return_sequences = 3
+    generation_config.num_return_sequences = generation_config.num_beams
+    return generation_config
+
+def get_multinomial_temperature() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.do_sample = True
+    generation_config.temperature = 0.8
+    generation_config.num_return_sequences = 1
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_temperature_and_num_return_sequence() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.do_sample = True
+    generation_config.temperature = 0.7
+    generation_config.num_return_sequences = 3
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_temperature_and_top_p() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.num_return_sequences = 1
+    generation_config.do_sample = True
+    generation_config.temperature = 0.8
+    generation_config.top_p = 0.9
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_temperature_and_top_k() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.do_sample = True
+    generation_config.num_return_sequences = 1
+    generation_config.temperature = 0.8
+    generation_config.top_k = 2
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_temperature_top_p_and_top_k() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.do_sample = True
+    generation_config.temperature = 0.8
+    generation_config.top_p = 0.9
+    generation_config.num_return_sequences = 1
+    generation_config.top_k = 2
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_temperature_and_repetition_penalty() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.do_sample = True
+    generation_config.num_return_sequences = 1
+    generation_config.temperature = 0.8
+    generation_config.repetition_penalty = 2.0
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_all_parameters() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.do_sample = True
+    generation_config.num_return_sequences = 4
+    generation_config.temperature = 0.9
+    generation_config.top_p = 0.8
+    generation_config.top_k = 20
+    generation_config.repetition_penalty = 2.0
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_temperature_and_frequence_penalty() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.do_sample = True
+    generation_config.temperature = 0.8
+    generation_config.frequency_penalty = 0.5
+    generation_config.num_return_sequences = 1
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_temperature_and_presence_penalty() -> GenerationConfig:
+    generation_config = GenerationConfig()
+    generation_config.do_sample = True
+    generation_config.temperature = 0.8
+    generation_config.presence_penalty = 0.1
+    generation_config.num_return_sequences = 1
+    generation_config.max_new_tokens = 30
+    return generation_config
+
+def get_multinomial_max_and_min_token() -> GenerationConfig:
+    multinomial = GenerationConfig()
+    multinomial.do_sample = True
+    multinomial.temperature = 0.9
+    multinomial.top_p = 0.9
+    multinomial.top_k = 20
+    multinomial.num_return_sequences = 3
+    multinomial.presence_penalty = 0.01
+    multinomial.frequency_penalty = 0.1
+    multinomial.min_new_tokens = 15
+    multinomial.max_new_tokens = 30
+    return multinomial

From f1b0237a1716cf1ef2d4c551abf16e268c169090 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Sat, 8 Feb 2025 00:49:08 +0400
Subject: [PATCH 02/14] Hugging face

---
 tests/python_tests/common.py                  | 182 +-----------------
 tests/python_tests/ov_genai_test_utils.py     |  14 +-
 .../python_tests/test_continuous_batching.py  |   5 +-
 tests/python_tests/test_kv_cache_eviction.py  |  14 +-
 tests/python_tests/test_llm_pipeline.py       |   7 +-
 .../python_tests/test_llm_pipeline_static.py  |   9 +-
 tests/python_tests/test_vlm_pipeline.py       |   7 +-
 tests/python_tests/utils/constants.py         |  10 +
 tests/python_tests/utils/hugging_face.py      | 180 +++++++++++++++++
 9 files changed, 226 insertions(+), 202 deletions(-)
 create mode 100644 tests/python_tests/utils/constants.py
 create mode 100644 tests/python_tests/utils/hugging_face.py

diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index ca26923e48..3e79ee15e2 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -14,6 +14,8 @@
 from typing import List, Tuple, Callable
 
 from utils.generation_config import get_greedy, get_beam_search
+from utils.constants import default_ov_config
+from utils.hugging_face import convert_models, get_hugging_face_models, run_hugging_face
 
 TESTS_ROOT = Path(__file__).parent
 
@@ -55,144 +57,6 @@ def get_scheduler_config(scheduler_params: dict = None) -> SchedulerConfig:
     return scheduler_config
 
 
-def convert_to_hf(
-    default_generation_config : HFGenerationConfig,
-    generation_config : GenerationConfig
-) -> HFGenerationConfig:
-    if generation_config is None:
-        return
-
-    kwargs = {}
-    kwargs['return_dict_in_generate'] = True
-
-    # generic parameters
-    kwargs['max_length'] = generation_config.max_length
-    # has higher priority than 'max_length'
-    kwargs['max_new_tokens'] = generation_config.max_new_tokens
-    kwargs['min_new_tokens'] = generation_config.min_new_tokens
-    if generation_config.stop_strings:
-        kwargs['stop_strings'] = generation_config.stop_strings
-
-    # copy default parameters
-    kwargs['bos_token_id'] = default_generation_config.bos_token_id
-    kwargs['pad_token_id'] = default_generation_config.pad_token_id
-
-    if len(generation_config.stop_token_ids) > 0:
-        kwargs['eos_token_id'] = list(generation_config.stop_token_ids)
-    elif generation_config.eos_token_id != -1:
-        kwargs['eos_token_id'] = generation_config.eos_token_id
-    else:
-        kwargs['eos_token_id'] = default_generation_config.eos_token_id
-
-    # copy penalties
-    kwargs['repetition_penalty'] = generation_config.repetition_penalty
-
-    if generation_config.is_beam_search():
-        # beam search case
-        kwargs['num_beam_groups'] = generation_config.num_beam_groups
-        kwargs['num_beams'] = generation_config.num_beams
-        kwargs['length_penalty'] = generation_config.length_penalty
-        kwargs['no_repeat_ngram_size'] = generation_config.no_repeat_ngram_size
-        kwargs['num_return_sequences'] = generation_config.num_return_sequences
-        kwargs['output_scores'] = True
-
-        if generation_config.num_beam_groups > 1:
-            kwargs['diversity_penalty'] = generation_config.diversity_penalty
-
-        # in OpenVINO GenAI this parameter is called stop_criteria,
-        # while in HF it's called early_stopping.
-        # HF values True, False and "never" correspond to OV GenAI values "EARLY", "HEURISTIC" and "NEVER"
-        STOP_CRITERIA_MAP = {
-            StopCriteria.NEVER: "never",
-            StopCriteria.EARLY: True,
-            StopCriteria.HEURISTIC: False
-        }
-
-        kwargs['early_stopping'] = STOP_CRITERIA_MAP[generation_config.stop_criteria]
-    elif generation_config.is_multinomial():
-        # mulitinomial
-        kwargs['temperature'] = generation_config.temperature
-        kwargs['top_k'] = generation_config.top_k
-        kwargs['top_p'] = generation_config.top_p
-        kwargs['do_sample'] = generation_config.do_sample
-    else:
-        # greedy
-        pass
-
-    hf_generation_config = HFGenerationConfig(**kwargs)
-    return hf_generation_config
-
-
-def run_hugging_face(
-    opt_model,
-    hf_tokenizer,
-    prompts: List[str],
-    generation_configs: List[GenerationConfig] | GenerationConfig,
-) -> List[GenerationResult]:
-    generation_results = []
-
-    if type(generation_configs) is list:
-        # process prompt by promp as we have multiple generation configs
-        for prompt, generation_config in zip(prompts, generation_configs):
-            hf_generation_config = convert_to_hf(opt_model.generation_config, generation_config)
-            inputs = {}
-            if hf_tokenizer.chat_template and generation_config.apply_chat_template:
-                prompt = hf_tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
-                inputs = hf_tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
-            else:
-                inputs = hf_tokenizer(prompt, return_tensors="pt")
-            input_ids, attention_mask = inputs['input_ids'], inputs['attention_mask']
-            prompt_len = 0 if generation_config.echo else input_ids.numel()
-
-            generate_outputs = opt_model.generate(input_ids=input_ids, attention_mask=attention_mask, generation_config=hf_generation_config, tokenizer=hf_tokenizer)
-            all_text_batch = hf_tokenizer.batch_decode([generated_ids[prompt_len:] for generated_ids in generate_outputs.sequences], skip_special_tokens=True)
-
-            generation_result = GenerationResult()
-            generation_result.m_generation_ids = all_text_batch
-            # sequences_scores are available only for beam search case
-            if generation_config.is_beam_search():
-                generation_result.m_scores = [score for score in generate_outputs.sequences_scores]
-            generation_results.append(generation_result)
-    else:
-        inputs = {}
-        if hf_tokenizer.chat_template and generation_configs.apply_chat_template:
-            processed_prompts = []
-            for prompt in prompts:
-                processed_prompts.append(hf_tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True))
-            # process all prompts as a single batch as we have a single generation config for all prompts
-            inputs = hf_tokenizer(processed_prompts, return_tensors='pt', padding=True, truncation=True, add_special_tokens=False, padding_side='left')
-        else:
-            inputs = hf_tokenizer(prompts, return_tensors='pt', padding=True, truncation=True, padding_side='left')
-        input_ids, attention_mask = inputs['input_ids'], inputs['attention_mask']
-        hf_generation_config = convert_to_hf(opt_model.generation_config, generation_configs)
-        hf_encoded_outputs = opt_model.generate(input_ids, attention_mask=attention_mask, generation_config=hf_generation_config, tokenizer=hf_tokenizer)
-
-        generation_ids = []
-        scores = []
-
-        for idx, hf_encoded_out in enumerate(hf_encoded_outputs.sequences):
-            prompt_idx = idx // hf_generation_config.num_return_sequences
-            prompt_len = 0 if generation_configs.echo else input_ids[prompt_idx].numel()
-            decoded_text = hf_tokenizer.decode(hf_encoded_out[prompt_len:], skip_special_tokens=True)
-            generation_ids.append(decoded_text)
-            if generation_configs.is_beam_search():
-                scores.append(hf_encoded_outputs.sequences_scores[idx])
-
-            # if we need to move to next generation result
-            if (idx + 1) // hf_generation_config.num_return_sequences != prompt_idx:
-                generation_result = GenerationResult()
-                generation_result.m_generation_ids = generation_ids
-                generation_result.m_scores = scores
-                generation_results.append(generation_result)
-                generation_ids = []
-                scores = []
-
-    del hf_tokenizer
-    del opt_model
-
-    return generation_results
-
-
 def run_continuous_batching(
     models_path : Path,
     scheduler_config : SchedulerConfig,
@@ -202,7 +66,7 @@ def run_continuous_batching(
     if type(generation_configs) is not list:
         generation_configs = [generation_configs] * len(prompts)
  
-    cb_pipe = ContinuousBatchingPipeline(models_path, scheduler_config=scheduler_config, device='CPU', tokenizer_properties={}, properties=get_default_properties())
+    cb_pipe = ContinuousBatchingPipeline(models_path, scheduler_config=scheduler_config, device='CPU', tokenizer_properties={}, properties=default_ov_config)
     output = cb_pipe.generate(prompts, generation_configs)
 
     del cb_pipe
@@ -211,15 +75,6 @@ def run_continuous_batching(
     return output
 
 
-def get_default_properties():
-    import openvino.properties.hint as hints
-    import openvino as ov
-
-    return {
-        hints.inference_precision : ov.Type.f32,
-        hints.kv_cache_precision : ov.Type.f16,
-    }
-
 def get_models_list_from_path(file_name: str):
     models = []
     with open(file_name) as f:
@@ -259,7 +114,7 @@ def run_llm_pipeline(
     use_cb : bool = False,
     streamer: StreamerWithResults | Callable | StreamerBase = None
 ) -> List[GenerationResult]:
-    properties = get_default_properties()
+    properties = default_ov_config
     if use_cb:
         properties['scheduler_config'] = SchedulerConfig()
     ov_pipe = LLMPipeline(models_path, device='CPU', **properties)
@@ -328,35 +183,6 @@ def compare_generation_results(prompts: List[str], hf_results: List[GenerationRe
         print(f"Prompt = {prompt}\nReference result = {ref_result}\nOpenVINO result = {ov_result.m_generation_ids}")
         compare_generation_result(ref_result, ov_result, generation_config)
 
-
-def get_hugging_face_models(model_id: str):
-    hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_properties())
-    return opt_model, hf_tokenizer
-
-
-def convert_models(opt_model : OVModelForCausalLM, hf_tokenizer : AutoTokenizer, models_path: Path):
-    opt_model.save_pretrained(models_path)
-
-    # to store tokenizer config jsons with special tokens
-    hf_tokenizer.save_pretrained(models_path)
-
-    # save generation config
-    opt_model.generation_config.save_pretrained(models_path)
-
-    # convert tokenizers as well
-    convert_and_save_tokenizer(hf_tokenizer, models_path)
-
-
-def convert_and_save_tokenizer(hf_tokenizer : AutoTokenizer, models_path: Path):
-    from openvino_tokenizers import convert_tokenizer
-    from openvino import save_model
-
-    tokenizer, detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)
-    save_model(tokenizer, models_path / "openvino_tokenizer.xml")
-    save_model(detokenizer, models_path / "openvino_detokenizer.xml")
-
-
 def run_llm_pipeline_with_ref(model_id: str, 
                               prompts: List[str], 
                               generation_config: GenerationConfig | dict, 
diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
index 19628b2f70..5c9be11942 100644
--- a/tests/python_tests/ov_genai_test_utils.py
+++ b/tests/python_tests/ov_genai_test_utils.py
@@ -14,7 +14,9 @@
 import json
 
 import openvino_genai as ov_genai
-from common import get_default_properties, delete_rt_info
+from common import delete_rt_info
+
+from utils.constants import default_ov_config
 
 def get_models_list():
     precommit_models = [
@@ -92,7 +94,7 @@ def read_model(params, **tokenizer_kwargs):
 
     if (models_path / "openvino_model.xml").exists():
         opt_model = OVModelForCausalLM.from_pretrained(models_path, trust_remote_code=True,
-                                                       compile=False, device='CPU', ov_config=get_default_properties())
+                                                       compile=False, device='CPU', ov_config=default_ov_config)
     else:
         ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
                                                                              with_detokenizer=True,
@@ -104,7 +106,7 @@ def read_model(params, **tokenizer_kwargs):
         hf_tokenizer.save_pretrained(models_path)
 
         opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,
-                                                       compile=False, device='CPU', load_in_8bit=False, ov_config=get_default_properties())
+                                                       compile=False, device='CPU', load_in_8bit=False, ov_config=default_ov_config)
         opt_model.generation_config.save_pretrained(models_path)
         opt_model.config.save_pretrained(models_path)
         opt_model.save_pretrained(models_path)
@@ -114,7 +116,7 @@ def read_model(params, **tokenizer_kwargs):
         models_path,
         hf_tokenizer,
         opt_model,
-        ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **get_default_properties()),
+        ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **default_ov_config),
     )
 
 
@@ -179,7 +181,7 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
         with (temp_path / config_name).open('w') as f:
             json.dump(config_json, f)
 
-    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **get_default_properties())
+    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **default_ov_config)
 
     for _, config_name in configs:
         os.remove(temp_path / config_name)
@@ -189,4 +191,4 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
 
 @functools.lru_cache(1)
 def get_continuous_batching(path):
-    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **get_default_properties())
+    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **default_ov_config)
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index 808b8682ae..187628b191 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, GenerationConfig, SchedulerConfig,  Tokenizer, draft_model
 
-from common import get_default_properties, get_hugging_face_models, convert_models, generate_and_compare_with_reference_text, \
+from common import get_hugging_face_models, convert_models, generate_and_compare_with_reference_text, \
     get_scheduler_config, run_cb_pipeline_with_ref
 from test_sampling import RandomSamplingTestStruct, get_current_platform_ref_texts
 
@@ -36,6 +36,7 @@ def read_models_list(file_name: str):
 from utils.generation_config import get_greedy, get_beam_search, \
     get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
     get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
+from utils.constants import default_ov_config
 
 #
 # e2e tests on random and real models
@@ -160,7 +161,7 @@ def test_post_oom_health(tmp_path, sampling_config):
     models_path : Path = tmp_path / model_id
     convert_models(opt_model, hf_tokenizer, models_path)
 
-    cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **get_default_properties())
+    cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **default_ov_config)
 
     # First run should return incomplete response
     output = cb_pipe.generate(["What is OpenVINO?"], [generation_config])
diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
index 6dd6c57511..ae01e94a75 100644
--- a/tests/python_tests/test_kv_cache_eviction.py
+++ b/tests/python_tests/test_kv_cache_eviction.py
@@ -17,9 +17,11 @@
 from openvino import serialize
 from transformers import AutoTokenizer
 
-from common import TESTS_ROOT, run_cb_pipeline_with_ref, get_default_properties
+from common import TESTS_ROOT, run_cb_pipeline_with_ref
 from utils_longbench import dataset2maxlen, evaluate, preprocess_prompt, post_process_pred
 
+from utils.constants import default_ov_config
+
 
 def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]:
     file_path = TESTS_ROOT / 'data' / file_name
@@ -45,7 +47,7 @@ class ConvertedModel:
 @pytest.fixture(scope='module')
 def converted_model(tmp_path_factory):
     model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=get_default_properties())
+    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=default_ov_config)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     models_path = tmp_path_factory.mktemp("cacheopt_test_models") / model_id
     model.save_pretrained(models_path)
@@ -124,8 +126,8 @@ def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, t
     scheduler_config_opt.enable_prefix_caching = enable_prefix_caching
 
     models_path = converted_model.models_path
-    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, get_default_properties())
-    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, get_default_properties())
+    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, default_ov_config)
+    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, default_ov_config)
 
     tokenizer = converted_model.tokenizer
 
@@ -237,8 +239,8 @@ def test_optimized_generation_longbench(qwen2_converted_model, device, test_stru
     if scheduler_config_opt.use_cache_eviction:
         scheduler_config_opt.cache_eviction_config = LONGBENCH_CACHE_EVICTION_CONFIG
 
-    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, {}, get_default_properties())
-    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, {}, get_default_properties())
+    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, {}, default_ov_config)
+    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, {}, default_ov_config)
 
     model_name = "/".join(models_path.parts[-2:])
     subset = test_struct.subset
diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
index 276aff7251..5fc5453200 100644
--- a/tests/python_tests/test_llm_pipeline.py
+++ b/tests/python_tests/test_llm_pipeline.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 import torch
 
-from common import run_llm_pipeline_with_ref, convert_to_hf
+from common import run_llm_pipeline_with_ref
 from ov_genai_test_utils import (
     get_models_list,
     read_model,
@@ -19,6 +19,7 @@
     get_chat_models_list,
     model_tmp_path,
 )
+from utils.hugging_face import generation_config_to_hf
 
 #
 # e2e work
@@ -50,7 +51,7 @@ def test_encoded_inputs(model_descr, inputs):
     model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model(model_descr)
 
     ov_generation_config = GenerationConfig(max_new_tokens=20)
-    hf_generation_config = convert_to_hf(opt_model.generation_config, ov_generation_config)
+    hf_generation_config = generation_config_to_hf(opt_model.generation_config, ov_generation_config)
 
     input_ids, attention_mask = inputs
     prompt_len = input_ids.shape[1]
@@ -132,7 +133,7 @@ def test_chat_scenario(model_descr, generation_config_kwargs: Dict):
     model_id, path, tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1]))
 
     ov_generation_config = GenerationConfig(**generation_config_kwargs)
-    hf_generation_config = convert_to_hf(opt_model.generation_config, ov_generation_config)
+    hf_generation_config = generation_config_to_hf(opt_model.generation_config, ov_generation_config)
 
     ov_pipe.start_chat()
     for prompt in questions:
diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py
index dd329eb131..d8b24d825e 100644
--- a/tests/python_tests/test_llm_pipeline_static.py
+++ b/tests/python_tests/test_llm_pipeline_static.py
@@ -12,8 +12,7 @@
     get_chat_models_list,
     read_model
 )
-from common import get_default_properties
-
+from utils.constants import default_ov_config
 from utils.generation_config import                     \
     get_greedy,                                         \
     get_greedy_with_penalties,                          \
@@ -33,7 +32,7 @@
                       'NPUW_ONLINE_PIPELINE': 'NONE',
                       'PREFILL_CONFIG': { },
                       'GENERATE_CONFIG': { }
-                } | get_default_properties()
+                } | default_ov_config
 
 
 def generate_chat_history(model_path, device, pipeline_config, questions):
@@ -55,7 +54,7 @@ def test_generation_compare_with_stateful(generation_config):
     prompt = 'What is OpenVINO?'
     model_path = read_model(get_models_list()[0])[1]
 
-    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **get_default_properties())
+    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **default_ov_config)
     ref_out = stateful_pipe.generate(prompt, generation_config)
 
     static_pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
@@ -221,7 +220,7 @@ def test_chat_generation():
 
     model_path = read_model(get_chat_models_list()[0])[1]
 
-    chat_history_stateful = generate_chat_history(model_path, "CPU", get_default_properties(), questions)
+    chat_history_stateful = generate_chat_history(model_path, "CPU", default_ov_config, questions)
     chat_history_static   = generate_chat_history(model_path, "NPU", common_config, questions)
 
     print('npu chat: \n{chat_history_static}\n')
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 3d1b0dccdc..3fe4272fcf 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -7,7 +7,10 @@
 import transformers
 from optimum.intel.openvino import OVModelForVisualCausalLM
 from openvino_genai import VLMPipeline, GenerationConfig
-from common import get_image_by_link, get_beam_search, get_multinomial_all_parameters, get_default_properties
+from common import get_image_by_link
+
+from utils.generation_config import get_beam_search, get_multinomial_all_parameters
+from utils.constants import default_ov_config
 
 def get_ov_model(model_id, cache):
     model_dir = cache.mkdir(model_id.split('/')[-1])
@@ -18,7 +21,7 @@ def get_ov_model(model_id, cache):
     ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(processor.tokenizer, with_detokenizer=True)
     openvino.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml")
     openvino.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml")
-    model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_properties())
+    model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=default_ov_config)
     processor.chat_template = processor.tokenizer.chat_template  # It seems that tiny-random-phi3-vision is saved incorrectly. That line works this around.
     processor.save_pretrained(model_dir)
     model.save_pretrained(model_dir)
diff --git a/tests/python_tests/utils/constants.py b/tests/python_tests/utils/constants.py
new file mode 100644
index 0000000000..b67ccca20f
--- /dev/null
+++ b/tests/python_tests/utils/constants.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import openvino.properties.hint as hints
+import openvino as ov
+
+default_ov_config = {
+    hints.inference_precision : ov.Type.f32,
+    hints.kv_cache_precision : ov.Type.f16,
+}
\ No newline at end of file
diff --git a/tests/python_tests/utils/hugging_face.py b/tests/python_tests/utils/hugging_face.py
new file mode 100644
index 0000000000..8f8987d647
--- /dev/null
+++ b/tests/python_tests/utils/hugging_face.py
@@ -0,0 +1,180 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+from typing import List
+
+from transformers import AutoTokenizer
+from transformers import GenerationConfig as HFGenerationConfig
+
+from optimum.intel import OVModelForCausalLM
+from openvino import save_model
+from openvino_genai import GenerationResult, GenerationConfig, StopCriteria
+from openvino_tokenizers import convert_tokenizer
+
+from utils.constants import default_ov_config
+
+def generation_config_to_hf(
+    default_generation_config : HFGenerationConfig,
+    generation_config : GenerationConfig
+) -> HFGenerationConfig:
+    if generation_config is None:
+        return
+
+    kwargs = {}
+    kwargs['return_dict_in_generate'] = True
+
+    # generic parameters
+    kwargs['max_length'] = generation_config.max_length
+    # has higher priority than 'max_length'
+    kwargs['max_new_tokens'] = generation_config.max_new_tokens
+    kwargs['min_new_tokens'] = generation_config.min_new_tokens
+    if generation_config.stop_strings:
+        kwargs['stop_strings'] = generation_config.stop_strings
+
+    # copy default parameters
+    kwargs['bos_token_id'] = default_generation_config.bos_token_id
+    kwargs['pad_token_id'] = default_generation_config.pad_token_id
+
+    if len(generation_config.stop_token_ids) > 0:
+        kwargs['eos_token_id'] = list(generation_config.stop_token_ids)
+    elif generation_config.eos_token_id != -1:
+        kwargs['eos_token_id'] = generation_config.eos_token_id
+    else:
+        kwargs['eos_token_id'] = default_generation_config.eos_token_id
+
+    # copy penalties
+    kwargs['repetition_penalty'] = generation_config.repetition_penalty
+
+    if generation_config.is_beam_search():
+        # beam search case
+        kwargs['num_beam_groups'] = generation_config.num_beam_groups
+        kwargs['num_beams'] = generation_config.num_beams
+        kwargs['length_penalty'] = generation_config.length_penalty
+        kwargs['no_repeat_ngram_size'] = generation_config.no_repeat_ngram_size
+        kwargs['num_return_sequences'] = generation_config.num_return_sequences
+        kwargs['output_scores'] = True
+
+        if generation_config.num_beam_groups > 1:
+            kwargs['diversity_penalty'] = generation_config.diversity_penalty
+
+        # in OpenVINO GenAI this parameter is called stop_criteria,
+        # while in HF it's called early_stopping.
+        # HF values True, False and "never" correspond to OV GenAI values "EARLY", "HEURISTIC" and "NEVER"
+        STOP_CRITERIA_MAP = {
+            StopCriteria.NEVER: "never",
+            StopCriteria.EARLY: True,
+            StopCriteria.HEURISTIC: False
+        }
+
+        kwargs['early_stopping'] = STOP_CRITERIA_MAP[generation_config.stop_criteria]
+    elif generation_config.is_multinomial():
+        # mulitinomial
+        kwargs['temperature'] = generation_config.temperature
+        kwargs['top_k'] = generation_config.top_k
+        kwargs['top_p'] = generation_config.top_p
+        kwargs['do_sample'] = generation_config.do_sample
+    else:
+        # greedy
+        pass
+
+    hf_generation_config = HFGenerationConfig(**kwargs)
+    return hf_generation_config
+
+def run_hugging_face(
+    opt_model,
+    hf_tokenizer,
+    prompts: List[str],
+    generation_configs: List[GenerationConfig] | GenerationConfig,
+) -> List[GenerationResult]:
+    generation_results = []
+
+    if type(generation_configs) is list:
+        # process prompt by promp as we have multiple generation configs
+        for prompt, generation_config in zip(prompts, generation_configs):
+            hf_generation_config = generation_config_to_hf(opt_model.generation_config, generation_config)
+            inputs = {}
+            if hf_tokenizer.chat_template and generation_config.apply_chat_template:
+                prompt = hf_tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True)
+                inputs = hf_tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
+            else:
+                inputs = hf_tokenizer(prompt, return_tensors="pt")
+            input_ids, attention_mask = inputs['input_ids'], inputs['attention_mask']
+            prompt_len = 0 if generation_config.echo else input_ids.numel()
+
+            generate_outputs = opt_model.generate(input_ids=input_ids, attention_mask=attention_mask, generation_config=hf_generation_config, tokenizer=hf_tokenizer)
+            all_text_batch = hf_tokenizer.batch_decode([generated_ids[prompt_len:] for generated_ids in generate_outputs.sequences], skip_special_tokens=True)
+
+            generation_result = GenerationResult()
+            generation_result.m_generation_ids = all_text_batch
+            # sequences_scores are available only for beam search case
+            if generation_config.is_beam_search():
+                generation_result.m_scores = [score for score in generate_outputs.sequences_scores]
+            generation_results.append(generation_result)
+    else:
+        inputs = {}
+        if hf_tokenizer.chat_template and generation_configs.apply_chat_template:
+            processed_prompts = []
+            for prompt in prompts:
+                processed_prompts.append(hf_tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True))
+            # process all prompts as a single batch as we have a single generation config for all prompts
+            inputs = hf_tokenizer(processed_prompts, return_tensors='pt', padding=True, truncation=True, add_special_tokens=False, padding_side='left')
+        else:
+            inputs = hf_tokenizer(prompts, return_tensors='pt', padding=True, truncation=True, padding_side='left')
+        input_ids, attention_mask = inputs['input_ids'], inputs['attention_mask']
+        hf_generation_config = generation_config_to_hf(opt_model.generation_config, generation_configs)
+        hf_encoded_outputs = opt_model.generate(input_ids, attention_mask=attention_mask, generation_config=hf_generation_config, tokenizer=hf_tokenizer)
+
+        generation_ids = []
+        scores = []
+
+        for idx, hf_encoded_out in enumerate(hf_encoded_outputs.sequences):
+            prompt_idx = idx // hf_generation_config.num_return_sequences
+            prompt_len = 0 if generation_configs.echo else input_ids[prompt_idx].numel()
+            decoded_text = hf_tokenizer.decode(hf_encoded_out[prompt_len:], skip_special_tokens=True)
+            generation_ids.append(decoded_text)
+            if generation_configs.is_beam_search():
+                scores.append(hf_encoded_outputs.sequences_scores[idx])
+
+            # if we need to move to next generation result
+            if (idx + 1) // hf_generation_config.num_return_sequences != prompt_idx:
+                generation_result = GenerationResult()
+                generation_result.m_generation_ids = generation_ids
+                generation_result.m_scores = scores
+                generation_results.append(generation_result)
+                generation_ids = []
+                scores = []
+
+    del hf_tokenizer
+    del opt_model
+
+    return generation_results
+
+
+def get_hugging_face_models(model_id: str):
+    hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, load_in_8bit=False, trust_remote_code=True, ov_config=default_ov_config)
+    return opt_model, hf_tokenizer
+
+
+def convert_and_save_tokenizer(hf_tokenizer : AutoTokenizer,
+                               models_path: Path):
+
+    tokenizer, detokenizer = convert_tokenizer(hf_tokenizer, with_detokenizer=True)
+    save_model(tokenizer, models_path / "openvino_tokenizer.xml")
+    save_model(detokenizer, models_path / "openvino_detokenizer.xml")
+
+
+def convert_models(opt_model : OVModelForCausalLM,
+                   hf_tokenizer : AutoTokenizer,
+                   models_path: Path):
+    opt_model.save_pretrained(models_path)
+
+    # to store tokenizer config jsons with special tokens
+    hf_tokenizer.save_pretrained(models_path)
+
+    # save generation config
+    opt_model.generation_config.save_pretrained(models_path)
+
+    # convert tokenizers as well
+    convert_and_save_tokenizer(hf_tokenizer, models_path)

From 3a42c1cec7307da1a4450629d6ec8d2888bf9e6e Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 15:53:20 +0400
Subject: [PATCH 03/14] Test

---
 tests/python_tests/test_continuous_batching.py | 3 ++-
 tests/python_tests/test_sampling.py            | 3 ++-
 tests/python_tests/test_tokenizer.py           | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index 187628b191..68660ef993 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 from openvino_genai import ContinuousBatchingPipeline, LLMPipeline, GenerationConfig, SchedulerConfig,  Tokenizer, draft_model
 
-from common import get_hugging_face_models, convert_models, generate_and_compare_with_reference_text, \
+from common import generate_and_compare_with_reference_text, \
     get_scheduler_config, run_cb_pipeline_with_ref
 from test_sampling import RandomSamplingTestStruct, get_current_platform_ref_texts
 
@@ -37,6 +37,7 @@ def read_models_list(file_name: str):
     get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
     get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
 from utils.constants import default_ov_config
+from utils.hugging_face import get_hugging_face_models, convert_models
 
 #
 # e2e tests on random and real models
diff --git a/tests/python_tests/test_sampling.py b/tests/python_tests/test_sampling.py
index edc5f1a29a..86a7635f4b 100644
--- a/tests/python_tests/test_sampling.py
+++ b/tests/python_tests/test_sampling.py
@@ -9,8 +9,9 @@
 from openvino_genai import GenerationConfig, StopCriteria
 from typing import List, TypedDict
 
-from common import get_hugging_face_models, convert_models, run_llm_pipeline_with_ref, run_llm_pipeline, compare_generation_results, StreamerWithResults
+from common import run_llm_pipeline_with_ref, run_llm_pipeline
 
+from utils.hugging_face import get_hugging_face_models, convert_models
 
 @pytest.mark.precommit
 @pytest.mark.parametrize("generation_config,prompt",
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
index c1122fab7f..e866a8c9c1 100644
--- a/tests/python_tests/test_tokenizer.py
+++ b/tests/python_tests/test_tokenizer.py
@@ -9,7 +9,7 @@
 from typing import Dict, Tuple, List
 import openvino_genai
 import json
-from common import delete_rt_info, convert_and_save_tokenizer
+from common import delete_rt_info
 from ov_genai_test_utils import (
     get_models_list,
     get_chat_models_list,
@@ -17,6 +17,8 @@
     model_tmp_path,
 )
 
+from utils.hugging_face import convert_and_save_tokenizer
+
 
 def load_genai_tokenizer_with_configs(configs: List[Tuple], temp_path):
     delete_rt_info(configs, temp_path)

From d2fc50c97a793c564d25665bd4129d2888edb32c Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 16:06:23 +0400
Subject: [PATCH 04/14] comparation

---
 tests/python_tests/common.py            | 29 +------------------------
 tests/python_tests/utils/comparation.py |  0
 2 files changed, 1 insertion(+), 28 deletions(-)
 create mode 100644 tests/python_tests/utils/comparation.py

diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index 3e79ee15e2..f50400073a 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -16,6 +16,7 @@
 from utils.generation_config import get_greedy, get_beam_search
 from utils.constants import default_ov_config
 from utils.hugging_face import convert_models, get_hugging_face_models, run_hugging_face
+from utils.comparation import compare_generation_results
 
 TESTS_ROOT = Path(__file__).parent
 
@@ -155,34 +156,6 @@ def run_llm_pipeline(
     return generation_results
 
 
-def compare_generation_result(hf_result: GenerationResult, ov_result: GenerationResult, generation_config: GenerationConfig):
-    if generation_config.is_beam_search():
-        assert len(hf_result.m_scores) == len(ov_result.m_scores)
-        for hf_score, ov_score in zip(hf_result.m_scores, ov_result.m_scores):
-            # Note, that for fp32 / fp16 models scores are different less than 0.001
-            assert abs(hf_score - ov_score) < 0.02
-
-    if not generation_config.include_stop_str_in_output and len(generation_config.stop_strings) > 0:
-        assert len(hf_result.m_generation_ids) >= len(ov_result.m_generation_ids)
-        for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids):
-            assert ov_text in hf_text
-    else:
-        assert len(hf_result.m_generation_ids) == len(ov_result.m_generation_ids)
-        for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids):
-            assert hf_text == ov_text
-
-
-def compare_generation_results(prompts: List[str], hf_results: List[GenerationResult], ov_results: List[GenerationResult], generation_configs: List[GenerationConfig] | GenerationConfig):
-    if type(generation_configs) is not list:
-        generation_configs = [generation_configs]
-
-    assert len(prompts) == len(hf_results)
-    assert len(prompts) == len(ov_results)
-
-    for prompt, ref_result, ov_result, generation_config in zip(prompts, hf_results, ov_results, generation_configs):
-        print(f"Prompt = {prompt}\nReference result = {ref_result}\nOpenVINO result = {ov_result.m_generation_ids}")
-        compare_generation_result(ref_result, ov_result, generation_config)
-
 def run_llm_pipeline_with_ref(model_id: str, 
                               prompts: List[str], 
                               generation_config: GenerationConfig | dict, 
diff --git a/tests/python_tests/utils/comparation.py b/tests/python_tests/utils/comparation.py
new file mode 100644
index 0000000000..e69de29bb2

From 4e03adc9c83a3d6eebe95359981825b7b0c032d4 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 16:07:34 +0400
Subject: [PATCH 05/14] move get_image from utils

---
 tests/python_tests/test_vlm_pipeline.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 6bab5e706f..babf4a1c93 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -7,7 +7,6 @@
 import transformers
 from optimum.intel.openvino import OVModelForVisualCausalLM
 from openvino_genai import VLMPipeline, GenerationConfig
-from common import get_image_by_link
 
 from utils.generation_config import get_beam_search, get_multinomial_all_parameters
 from utils.constants import default_ov_config
@@ -54,6 +53,20 @@ def get_ov_model(model_id, cache):
     "katuni4ka/tiny-random-qwen2vl",
 ]
 
+
+def get_image_by_link(link):
+    from PIL import Image
+    import requests
+    from openvino import Tensor
+    import numpy as np
+
+    image = Image.open(requests.get(link, stream=True).raw)
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    image_data = np.array((np.array(image.getdata()) - 128).astype(np.byte)).reshape(1, image.size[1], image.size[0], 3)
+    return Tensor(image_data)
+
+
 @pytest.mark.precommit
 @pytest.mark.nightly
 @pytest.mark.parametrize("model_id", model_ids)

From 2ade2dcd888a9a131e29964c674420f58d8466be Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 16:13:14 +0400
Subject: [PATCH 06/14] Tokenizer config

---
 tests/python_tests/data/__init__.py                | 2 ++
 tests/python_tests/{ => data}/tokenizer_configs.py | 2 ++
 tests/python_tests/test_tokenizer.py               | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 tests/python_tests/data/__init__.py
 rename tests/python_tests/{ => data}/tokenizer_configs.py (99%)

diff --git a/tests/python_tests/data/__init__.py b/tests/python_tests/data/__init__.py
new file mode 100644
index 0000000000..6e922cea12
--- /dev/null
+++ b/tests/python_tests/data/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
\ No newline at end of file
diff --git a/tests/python_tests/tokenizer_configs.py b/tests/python_tests/data/tokenizer_configs.py
similarity index 99%
rename from tests/python_tests/tokenizer_configs.py
rename to tests/python_tests/data/tokenizer_configs.py
index 2b51dc2b0d..a0bfd7be15 100644
--- a/tests/python_tests/tokenizer_configs.py
+++ b/tests/python_tests/data/tokenizer_configs.py
@@ -1,3 +1,5 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 def get_tokenizer_configs():
     return {
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
index 6e7f53c79d..726a3163ce 100644
--- a/tests/python_tests/test_tokenizer.py
+++ b/tests/python_tests/test_tokenizer.py
@@ -94,7 +94,7 @@ def get_chat_templates():
         "BramVanroy/Llama-2-13b-chat-dutch"
     }
 
-    from tokenizer_configs import get_tokenizer_configs
+    from data.tokenizer_configs import get_tokenizer_configs
     return [(k, v) for k, v in get_tokenizer_configs().items() if k not in skipped_models]
 
 

From ef8283f80242893708b56b726f56b55001cf6b95 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 16:17:30 +0400
Subject: [PATCH 07/14] longbench

---
 tests/python_tests/common.py                       | 14 --------------
 tests/python_tests/test_kv_cache_eviction.py       |  2 +-
 .../{utils_longbench.py => utils/longbench.py}     |  0
 3 files changed, 1 insertion(+), 15 deletions(-)
 rename tests/python_tests/{utils_longbench.py => utils/longbench.py} (100%)

diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index f50400073a..f6a1dbdb32 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -211,20 +211,6 @@ def generate_and_compare_with_reference_text(models_path: Path, prompts: List[st
         for ref_text, ov_text in zip(ref_texts_for_this_prompt, ov_result.m_generation_ids):
             assert ref_text == ov_text
 
-
-def get_image_by_link(link):
-    from PIL import Image
-    import requests
-    from openvino import Tensor
-    import numpy as np
-
-    image = Image.open(requests.get(link, stream=True).raw)
-    if image.mode != 'RGB':
-        image = image.convert('RGB')
-    image_data = np.array((np.array(image.getdata()) - 128).astype(np.byte)).reshape(1, image.size[1], image.size[0], 3)
-    return Tensor(image_data)
-
-
 """rt_info has the highest priority. Delete it to respect configs."""
 def delete_rt_info(configs: List[Tuple], temp_path):
     core = openvino.Core()
diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
index ae01e94a75..81ae04bc3f 100644
--- a/tests/python_tests/test_kv_cache_eviction.py
+++ b/tests/python_tests/test_kv_cache_eviction.py
@@ -18,7 +18,7 @@
 from transformers import AutoTokenizer
 
 from common import TESTS_ROOT, run_cb_pipeline_with_ref
-from utils_longbench import dataset2maxlen, evaluate, preprocess_prompt, post_process_pred
+from utils.longbench import dataset2maxlen, evaluate, preprocess_prompt, post_process_pred
 
 from utils.constants import default_ov_config
 
diff --git a/tests/python_tests/utils_longbench.py b/tests/python_tests/utils/longbench.py
similarity index 100%
rename from tests/python_tests/utils_longbench.py
rename to tests/python_tests/utils/longbench.py

From 940ac3ec6dedd37a15e2a7c501b8a8e0e33b42de Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 16:19:17 +0400
Subject: [PATCH 08/14] comp

---
 tests/python_tests/utils/comparation.py | 38 +++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/python_tests/utils/comparation.py b/tests/python_tests/utils/comparation.py
index e69de29bb2..6293d30397 100644
--- a/tests/python_tests/utils/comparation.py
+++ b/tests/python_tests/utils/comparation.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from openvino_genai import GenerationResult, GenerationConfig
+from typing import List
+
+def compare_generation_result(hf_result: GenerationResult,
+                              ov_result: GenerationResult,
+                              generation_config: GenerationConfig):
+    if generation_config.is_beam_search():
+        assert len(hf_result.m_scores) == len(ov_result.m_scores)
+        for hf_score, ov_score in zip(hf_result.m_scores, ov_result.m_scores):
+            # Note, that for fp32 / fp16 models scores are different less than 0.001
+            assert abs(hf_score - ov_score) < 0.02
+
+    if not generation_config.include_stop_str_in_output and len(generation_config.stop_strings) > 0:
+        assert len(hf_result.m_generation_ids) >= len(ov_result.m_generation_ids)
+        for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids):
+            assert ov_text in hf_text
+    else:
+        assert len(hf_result.m_generation_ids) == len(ov_result.m_generation_ids)
+        for hf_text, ov_text in zip(hf_result.m_generation_ids, ov_result.m_generation_ids):
+            assert hf_text == ov_text
+            
+
+def compare_generation_results(prompts: List[str],
+                               hf_results: List[GenerationResult],
+                               ov_results: List[GenerationResult],
+                               generation_configs: List[GenerationConfig] | GenerationConfig):
+    if type(generation_configs) is not list:
+        generation_configs = [generation_configs]
+
+    assert len(prompts) == len(hf_results)
+    assert len(prompts) == len(ov_results)
+
+    for prompt, ref_result, ov_result, generation_config in zip(prompts, hf_results, ov_results, generation_configs):
+        print(f"Prompt = {prompt}\nReference result = {ref_result}\nOpenVINO result = {ov_result.m_generation_ids}")
+        compare_generation_result(ref_result, ov_result, generation_config)
\ No newline at end of file

From 333845f9631c61cfcc4725ab4c7f0637cd3b32af Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 16:36:30 +0400
Subject: [PATCH 09/14] remove extra init

---
 tests/python_tests/__init__.py | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 tests/python_tests/__init__.py

diff --git a/tests/python_tests/__init__.py b/tests/python_tests/__init__.py
deleted file mode 100644
index 6e922cea12..0000000000
--- a/tests/python_tests/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (C) 2018-2025 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
\ No newline at end of file

From 34b7303608b6188d5452a053a052f02ee64a82a5 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 16:38:06 +0400
Subject: [PATCH 10/14] remove extra

---
 tests/python_tests/__init__.py | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 tests/python_tests/__init__.py

diff --git a/tests/python_tests/__init__.py b/tests/python_tests/__init__.py
deleted file mode 100644
index 6e922cea12..0000000000
--- a/tests/python_tests/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (C) 2018-2025 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
\ No newline at end of file

From 82ebb74e7a13ab1b4c811068bffb82e5383cccad Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 19:29:00 +0400
Subject: [PATCH 11/14] upper case

---
 tests/python_tests/ov_genai_test_utils.py      | 12 ++++++------
 tests/python_tests/test_continuous_batching.py |  4 ++--
 tests/python_tests/test_kv_cache_eviction.py   | 12 ++++++------
 tests/python_tests/test_llm_pipeline.py        |  3 +--
 tests/python_tests/test_llm_pipeline_static.py |  8 ++++----
 tests/python_tests/test_vlm_pipeline.py        |  4 ++--
 tests/python_tests/utils/constants.py          |  2 +-
 tests/python_tests/utils/hugging_face.py       |  4 ++--
 8 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
index e971ab14eb..b7aa0a5212 100644
--- a/tests/python_tests/ov_genai_test_utils.py
+++ b/tests/python_tests/ov_genai_test_utils.py
@@ -16,7 +16,7 @@
 import openvino_genai as ov_genai
 from common import delete_rt_info
 
-from utils.constants import default_ov_config
+from utils.constants import DEFAULT_OV_CONFIG
 
 def get_models_list():
     precommit_models = [
@@ -96,7 +96,7 @@ def read_model(params, **tokenizer_kwargs):
 
     if (models_path / "openvino_model.xml").exists():
         opt_model = OVModelForCausalLM.from_pretrained(models_path, trust_remote_code=True,
-                                                       compile=False, device='CPU', ov_config=default_ov_config)
+                                                       compile=False, device='CPU', ov_config=DEFAULT_OV_CONFIG)
     else:
         ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
                                                                              with_detokenizer=True,
@@ -108,7 +108,7 @@ def read_model(params, **tokenizer_kwargs):
         hf_tokenizer.save_pretrained(models_path)
 
         opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,
-                                                       compile=False, device='CPU', load_in_8bit=False, ov_config=default_ov_config)
+                                                       compile=False, device='CPU', load_in_8bit=False, ov_config=DEFAULT_OV_CONFIG)
         opt_model.generation_config.save_pretrained(models_path)
         opt_model.config.save_pretrained(models_path)
         opt_model.save_pretrained(models_path)
@@ -118,7 +118,7 @@ def read_model(params, **tokenizer_kwargs):
         models_path,
         hf_tokenizer,
         opt_model,
-        ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **default_ov_config),
+        ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **DEFAULT_OV_CONFIG),
     )
 
 
@@ -183,7 +183,7 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
         with (temp_path / config_name).open('w') as f:
             json.dump(config_json, f)
 
-    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **default_ov_config)
+    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **DEFAULT_OV_CONFIG)
 
     for _, config_name in configs:
         os.remove(temp_path / config_name)
@@ -193,4 +193,4 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
 
 @functools.lru_cache(1)
 def get_continuous_batching(path):
-    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **default_ov_config)
+    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **DEFAULT_OV_CONFIG)
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index 6272dec6df..d6b4bacce1 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -36,7 +36,7 @@ def read_models_list(file_name: str):
 from utils.generation_config import get_greedy, get_beam_search, \
     get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
     get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
-from utils.constants import default_ov_config
+from utils.constants import DEFAULT_OV_CONFIG
 from utils.hugging_face import get_hugging_face_models, convert_models
 
 #
@@ -162,7 +162,7 @@ def test_post_oom_health(tmp_path, sampling_config):
     models_path : Path = tmp_path / model_id
     convert_models(opt_model, hf_tokenizer, models_path)
 
-    cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **default_ov_config)
+    cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **DEFAULT_OV_CONFIG)
 
     # First run should return incomplete response
     output = cb_pipe.generate(["What is OpenVINO?"], [generation_config])
diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
index ae01e94a75..9fbed7fc77 100644
--- a/tests/python_tests/test_kv_cache_eviction.py
+++ b/tests/python_tests/test_kv_cache_eviction.py
@@ -20,7 +20,7 @@
 from common import TESTS_ROOT, run_cb_pipeline_with_ref
 from utils_longbench import dataset2maxlen, evaluate, preprocess_prompt, post_process_pred
 
-from utils.constants import default_ov_config
+from utils.constants import DEFAULT_OV_CONFIG
 
 
 def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]:
@@ -47,7 +47,7 @@ class ConvertedModel:
 @pytest.fixture(scope='module')
 def converted_model(tmp_path_factory):
     model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=default_ov_config)
+    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=DEFAULT_OV_CONFIG)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     models_path = tmp_path_factory.mktemp("cacheopt_test_models") / model_id
     model.save_pretrained(models_path)
@@ -126,8 +126,8 @@ def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, t
     scheduler_config_opt.enable_prefix_caching = enable_prefix_caching
 
     models_path = converted_model.models_path
-    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, default_ov_config)
-    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, default_ov_config)
+    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, DEFAULT_OV_CONFIG)
+    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, DEFAULT_OV_CONFIG)
 
     tokenizer = converted_model.tokenizer
 
@@ -239,8 +239,8 @@ def test_optimized_generation_longbench(qwen2_converted_model, device, test_stru
     if scheduler_config_opt.use_cache_eviction:
         scheduler_config_opt.cache_eviction_config = LONGBENCH_CACHE_EVICTION_CONFIG
 
-    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, {}, default_ov_config)
-    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, {}, default_ov_config)
+    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, {}, DEFAULT_OV_CONFIG)
+    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, {}, DEFAULT_OV_CONFIG)
 
     model_name = "/".join(models_path.parts[-2:])
     subset = test_struct.subset
diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
index 6e7790db13..52885dc280 100644
--- a/tests/python_tests/test_llm_pipeline.py
+++ b/tests/python_tests/test_llm_pipeline.py
@@ -20,7 +20,6 @@
     model_tmp_path,
 )
 from utils.hugging_face import generation_config_to_hf
-
 #
 # e2e work
 #
@@ -288,7 +287,7 @@ def test_chat_scenario_callback_cancel(model_descr):
     model_id, path, tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
 
     ov_generation_config = GenerationConfig(**generation_config_kwargs)
-    hf_generation_config = convert_to_hf(opt_model.generation_config, ov_generation_config)
+    hf_generation_config = generation_config_to_hf(opt_model.generation_config, ov_generation_config)
     
     current_iter = 0
     num_iters = 3
diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py
index d8b24d825e..0d2db33598 100644
--- a/tests/python_tests/test_llm_pipeline_static.py
+++ b/tests/python_tests/test_llm_pipeline_static.py
@@ -12,7 +12,7 @@
     get_chat_models_list,
     read_model
 )
-from utils.constants import default_ov_config
+from utils.constants import DEFAULT_OV_CONFIG
 from utils.generation_config import                     \
     get_greedy,                                         \
     get_greedy_with_penalties,                          \
@@ -32,7 +32,7 @@
                       'NPUW_ONLINE_PIPELINE': 'NONE',
                       'PREFILL_CONFIG': { },
                       'GENERATE_CONFIG': { }
-                } | default_ov_config
+                } | DEFAULT_OV_CONFIG
 
 
 def generate_chat_history(model_path, device, pipeline_config, questions):
@@ -54,7 +54,7 @@ def test_generation_compare_with_stateful(generation_config):
     prompt = 'What is OpenVINO?'
     model_path = read_model(get_models_list()[0])[1]
 
-    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **default_ov_config)
+    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **DEFAULT_OV_CONFIG)
     ref_out = stateful_pipe.generate(prompt, generation_config)
 
     static_pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
@@ -220,7 +220,7 @@ def test_chat_generation():
 
     model_path = read_model(get_chat_models_list()[0])[1]
 
-    chat_history_stateful = generate_chat_history(model_path, "CPU", default_ov_config, questions)
+    chat_history_stateful = generate_chat_history(model_path, "CPU", DEFAULT_OV_CONFIG, questions)
     chat_history_static   = generate_chat_history(model_path, "NPU", common_config, questions)
 
     print('npu chat: \n{chat_history_static}\n')
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 6bab5e706f..ee20f9133e 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -10,7 +10,7 @@
 from common import get_image_by_link
 
 from utils.generation_config import get_beam_search, get_multinomial_all_parameters
-from utils.constants import default_ov_config
+from utils.constants import DEFAULT_OV_CONFIG
 
 def get_ov_model(model_id, cache):
     model_dir = cache.mkdir(model_id.split('/')[-1])
@@ -21,7 +21,7 @@ def get_ov_model(model_id, cache):
     ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(processor.tokenizer, with_detokenizer=True)
     openvino.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml")
     openvino.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml")
-    model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=default_ov_config)
+    model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=DEFAULT_OV_CONFIG)
     if processor.tokenizer.chat_template is not None:
         processor.chat_template = processor.tokenizer.chat_template  # It seems that tiny-random-phi3-vision is saved incorrectly. That line works this around.
     processor.save_pretrained(model_dir)
diff --git a/tests/python_tests/utils/constants.py b/tests/python_tests/utils/constants.py
index b67ccca20f..a7f7e9db52 100644
--- a/tests/python_tests/utils/constants.py
+++ b/tests/python_tests/utils/constants.py
@@ -4,7 +4,7 @@
 import openvino.properties.hint as hints
 import openvino as ov
 
-default_ov_config = {
+DEFAULT_OV_CONFIG = {
     hints.inference_precision : ov.Type.f32,
     hints.kv_cache_precision : ov.Type.f16,
 }
\ No newline at end of file
diff --git a/tests/python_tests/utils/hugging_face.py b/tests/python_tests/utils/hugging_face.py
index e7087c0818..41636e701a 100644
--- a/tests/python_tests/utils/hugging_face.py
+++ b/tests/python_tests/utils/hugging_face.py
@@ -12,7 +12,7 @@
 from openvino_genai import GenerationResult, GenerationConfig, StopCriteria
 from openvino_tokenizers import convert_tokenizer
 
-from utils.constants import default_ov_config
+from utils.constants import DEFAULT_OV_CONFIG
 
 def generation_config_to_hf(
     default_generation_config : HFGenerationConfig,
@@ -156,7 +156,7 @@ def run_hugging_face(
 
 def get_hugging_face_models(model_id: str):
     hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, load_in_8bit=False, trust_remote_code=True, ov_config=default_ov_config)
+    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, load_in_8bit=False, trust_remote_code=True, ov_config=DEFAULT_OV_CONFIG)
     return opt_model, hf_tokenizer
 
 

From dba2a870c3fe2bd8dff0115fa1797d7103627bfc Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Mon, 10 Feb 2025 20:05:51 +0400
Subject: [PATCH 12/14] fix tests

---
 tests/python_tests/common.py                   |  6 +++---
 tests/python_tests/ov_genai_test_utils.py      | 12 ++++++------
 tests/python_tests/test_continuous_batching.py |  4 ++--
 tests/python_tests/test_kv_cache_eviction.py   | 12 ++++++------
 tests/python_tests/test_llm_pipeline_static.py |  8 ++++----
 tests/python_tests/test_vlm_pipeline.py        |  4 ++--
 tests/python_tests/utils/constants.py          |  9 +++++----
 tests/python_tests/utils/hugging_face.py       |  4 ++--
 8 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index 3e79ee15e2..db6e21fea3 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -14,7 +14,7 @@
 from typing import List, Tuple, Callable
 
 from utils.generation_config import get_greedy, get_beam_search
-from utils.constants import default_ov_config
+from utils.constants import get_default_llm_propeties
 from utils.hugging_face import convert_models, get_hugging_face_models, run_hugging_face
 
 TESTS_ROOT = Path(__file__).parent
@@ -66,7 +66,7 @@ def run_continuous_batching(
     if type(generation_configs) is not list:
         generation_configs = [generation_configs] * len(prompts)
  
-    cb_pipe = ContinuousBatchingPipeline(models_path, scheduler_config=scheduler_config, device='CPU', tokenizer_properties={}, properties=default_ov_config)
+    cb_pipe = ContinuousBatchingPipeline(models_path, scheduler_config=scheduler_config, device='CPU', tokenizer_properties={}, properties=get_default_llm_propeties())
     output = cb_pipe.generate(prompts, generation_configs)
 
     del cb_pipe
@@ -114,7 +114,7 @@ def run_llm_pipeline(
     use_cb : bool = False,
     streamer: StreamerWithResults | Callable | StreamerBase = None
 ) -> List[GenerationResult]:
-    properties = default_ov_config
+    properties = get_default_llm_propeties()
     if use_cb:
         properties['scheduler_config'] = SchedulerConfig()
     ov_pipe = LLMPipeline(models_path, device='CPU', **properties)
diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
index b7aa0a5212..67bf51a9e4 100644
--- a/tests/python_tests/ov_genai_test_utils.py
+++ b/tests/python_tests/ov_genai_test_utils.py
@@ -16,7 +16,7 @@
 import openvino_genai as ov_genai
 from common import delete_rt_info
 
-from utils.constants import DEFAULT_OV_CONFIG
+from utils.constants import get_default_llm_propeties
 
 def get_models_list():
     precommit_models = [
@@ -96,7 +96,7 @@ def read_model(params, **tokenizer_kwargs):
 
     if (models_path / "openvino_model.xml").exists():
         opt_model = OVModelForCausalLM.from_pretrained(models_path, trust_remote_code=True,
-                                                       compile=False, device='CPU', ov_config=DEFAULT_OV_CONFIG)
+                                                       compile=False, device='CPU', ov_config=get_default_llm_propeties())
     else:
         ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
                                                                              with_detokenizer=True,
@@ -108,7 +108,7 @@ def read_model(params, **tokenizer_kwargs):
         hf_tokenizer.save_pretrained(models_path)
 
         opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,
-                                                       compile=False, device='CPU', load_in_8bit=False, ov_config=DEFAULT_OV_CONFIG)
+                                                       compile=False, device='CPU', load_in_8bit=False, ov_config=get_default_llm_propeties())
         opt_model.generation_config.save_pretrained(models_path)
         opt_model.config.save_pretrained(models_path)
         opt_model.save_pretrained(models_path)
@@ -118,7 +118,7 @@ def read_model(params, **tokenizer_kwargs):
         models_path,
         hf_tokenizer,
         opt_model,
-        ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **DEFAULT_OV_CONFIG),
+        ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **get_default_llm_propeties()),
     )
 
 
@@ -183,7 +183,7 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
         with (temp_path / config_name).open('w') as f:
             json.dump(config_json, f)
 
-    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **DEFAULT_OV_CONFIG)
+    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **get_default_llm_propeties())
 
     for _, config_name in configs:
         os.remove(temp_path / config_name)
@@ -193,4 +193,4 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
 
 @functools.lru_cache(1)
 def get_continuous_batching(path):
-    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **DEFAULT_OV_CONFIG)
+    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **get_default_llm_propeties())
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index d6b4bacce1..516f1234f9 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -36,7 +36,7 @@ def read_models_list(file_name: str):
 from utils.generation_config import get_greedy, get_beam_search, \
     get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
     get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
-from utils.constants import DEFAULT_OV_CONFIG
+from utils.constants import get_default_llm_propeties
 from utils.hugging_face import get_hugging_face_models, convert_models
 
 #
@@ -162,7 +162,7 @@ def test_post_oom_health(tmp_path, sampling_config):
     models_path : Path = tmp_path / model_id
     convert_models(opt_model, hf_tokenizer, models_path)
 
-    cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **DEFAULT_OV_CONFIG)
+    cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **get_default_llm_propeties())
 
     # First run should return incomplete response
     output = cb_pipe.generate(["What is OpenVINO?"], [generation_config])
diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
index 9fbed7fc77..9e0631266d 100644
--- a/tests/python_tests/test_kv_cache_eviction.py
+++ b/tests/python_tests/test_kv_cache_eviction.py
@@ -20,7 +20,7 @@
 from common import TESTS_ROOT, run_cb_pipeline_with_ref
 from utils_longbench import dataset2maxlen, evaluate, preprocess_prompt, post_process_pred
 
-from utils.constants import DEFAULT_OV_CONFIG
+from utils.constants import get_default_llm_propeties
 
 
 def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]:
@@ -47,7 +47,7 @@ class ConvertedModel:
 @pytest.fixture(scope='module')
 def converted_model(tmp_path_factory):
     model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=DEFAULT_OV_CONFIG)
+    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=get_default_llm_propeties())
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     models_path = tmp_path_factory.mktemp("cacheopt_test_models") / model_id
     model.save_pretrained(models_path)
@@ -126,8 +126,8 @@ def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, t
     scheduler_config_opt.enable_prefix_caching = enable_prefix_caching
 
     models_path = converted_model.models_path
-    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, DEFAULT_OV_CONFIG)
-    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, DEFAULT_OV_CONFIG)
+    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, get_default_llm_propeties())
+    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, get_default_llm_propeties())
 
     tokenizer = converted_model.tokenizer
 
@@ -239,8 +239,8 @@ def test_optimized_generation_longbench(qwen2_converted_model, device, test_stru
     if scheduler_config_opt.use_cache_eviction:
         scheduler_config_opt.cache_eviction_config = LONGBENCH_CACHE_EVICTION_CONFIG
 
-    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, {}, DEFAULT_OV_CONFIG)
-    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, {}, DEFAULT_OV_CONFIG)
+    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, {}, get_default_llm_propeties())
+    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, {}, get_default_llm_propeties())
 
     model_name = "/".join(models_path.parts[-2:])
     subset = test_struct.subset
diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py
index 0d2db33598..15f5324331 100644
--- a/tests/python_tests/test_llm_pipeline_static.py
+++ b/tests/python_tests/test_llm_pipeline_static.py
@@ -12,7 +12,7 @@
     get_chat_models_list,
     read_model
 )
-from utils.constants import DEFAULT_OV_CONFIG
+from utils.constants import get_default_llm_propeties
 from utils.generation_config import                     \
     get_greedy,                                         \
     get_greedy_with_penalties,                          \
@@ -32,7 +32,7 @@
                       'NPUW_ONLINE_PIPELINE': 'NONE',
                       'PREFILL_CONFIG': { },
                       'GENERATE_CONFIG': { }
-                } | DEFAULT_OV_CONFIG
+                } | get_default_llm_propeties()
 
 
 def generate_chat_history(model_path, device, pipeline_config, questions):
@@ -54,7 +54,7 @@ def test_generation_compare_with_stateful(generation_config):
     prompt = 'What is OpenVINO?'
     model_path = read_model(get_models_list()[0])[1]
 
-    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **DEFAULT_OV_CONFIG)
+    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **get_default_llm_propeties())
     ref_out = stateful_pipe.generate(prompt, generation_config)
 
     static_pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
@@ -220,7 +220,7 @@ def test_chat_generation():
 
     model_path = read_model(get_chat_models_list()[0])[1]
 
-    chat_history_stateful = generate_chat_history(model_path, "CPU", DEFAULT_OV_CONFIG, questions)
+    chat_history_stateful = generate_chat_history(model_path, "CPU", get_default_llm_propeties(), questions)
     chat_history_static   = generate_chat_history(model_path, "NPU", common_config, questions)
 
     print('npu chat: \n{chat_history_static}\n')
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index ee20f9133e..ea0fd36a90 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -10,7 +10,7 @@
 from common import get_image_by_link
 
 from utils.generation_config import get_beam_search, get_multinomial_all_parameters
-from utils.constants import DEFAULT_OV_CONFIG
+from utils.constants import get_default_llm_propeties
 
 def get_ov_model(model_id, cache):
     model_dir = cache.mkdir(model_id.split('/')[-1])
@@ -21,7 +21,7 @@ def get_ov_model(model_id, cache):
     ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(processor.tokenizer, with_detokenizer=True)
     openvino.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml")
     openvino.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml")
-    model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=DEFAULT_OV_CONFIG)
+    model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_llm_propeties())
     if processor.tokenizer.chat_template is not None:
         processor.chat_template = processor.tokenizer.chat_template  # It seems that tiny-random-phi3-vision is saved incorrectly. That line works this around.
     processor.save_pretrained(model_dir)
diff --git a/tests/python_tests/utils/constants.py b/tests/python_tests/utils/constants.py
index a7f7e9db52..ef4c4b32e2 100644
--- a/tests/python_tests/utils/constants.py
+++ b/tests/python_tests/utils/constants.py
@@ -4,7 +4,8 @@
 import openvino.properties.hint as hints
 import openvino as ov
 
-DEFAULT_OV_CONFIG = {
-    hints.inference_precision : ov.Type.f32,
-    hints.kv_cache_precision : ov.Type.f16,
-}
\ No newline at end of file
+def get_default_llm_propeties():
+    return {
+        hints.inference_precision : ov.Type.f32,
+        hints.kv_cache_precision : ov.Type.f16,
+    }
\ No newline at end of file
diff --git a/tests/python_tests/utils/hugging_face.py b/tests/python_tests/utils/hugging_face.py
index 41636e701a..d7f4f7e060 100644
--- a/tests/python_tests/utils/hugging_face.py
+++ b/tests/python_tests/utils/hugging_face.py
@@ -12,7 +12,7 @@
 from openvino_genai import GenerationResult, GenerationConfig, StopCriteria
 from openvino_tokenizers import convert_tokenizer
 
-from utils.constants import DEFAULT_OV_CONFIG
+from utils.constants import get_default_llm_propeties
 
 def generation_config_to_hf(
     default_generation_config : HFGenerationConfig,
@@ -156,7 +156,7 @@ def run_hugging_face(
 
 def get_hugging_face_models(model_id: str):
     hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, load_in_8bit=False, trust_remote_code=True, ov_config=DEFAULT_OV_CONFIG)
+    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_llm_propeties())
     return opt_model, hf_tokenizer
 
 

From 5029c671ea4da739e66a80f819435855c9e72166 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Wed, 12 Feb 2025 11:58:56 +0400
Subject: [PATCH 13/14] tmp

---
 tests/python_tests/common.py                   |  6 +++---
 tests/python_tests/ov_genai_test_utils.py      | 12 ++++++------
 tests/python_tests/test_continuous_batching.py |  4 ++--
 tests/python_tests/test_kv_cache_eviction.py   | 12 ++++++------
 tests/python_tests/test_llm_pipeline_static.py |  8 ++++----
 tests/python_tests/test_vlm_pipeline.py        |  4 ++--
 tests/python_tests/utils/constants.py          |  2 +-
 tests/python_tests/utils/hugging_face.py       |  4 ++--
 8 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index 7bc51a25ec..2cad0fa432 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -14,7 +14,7 @@
 from typing import List, Tuple, Callable
 
 from utils.generation_config import get_greedy, get_beam_search
-from utils.constants import get_default_llm_propeties
+from utils.constants import get_default_llm_properties
 from utils.hugging_face import convert_models, get_hugging_face_models, run_hugging_face
 from utils.comparation import compare_generation_results
 
@@ -67,7 +67,7 @@ def run_continuous_batching(
     if type(generation_configs) is not list:
         generation_configs = [generation_configs] * len(prompts)
  
-    cb_pipe = ContinuousBatchingPipeline(models_path, scheduler_config=scheduler_config, device='CPU', tokenizer_properties={}, properties=get_default_llm_propeties())
+    cb_pipe = ContinuousBatchingPipeline(models_path, scheduler_config=scheduler_config, device='CPU', tokenizer_properties={}, properties=get_default_llm_properties())
     output = cb_pipe.generate(prompts, generation_configs)
 
     del cb_pipe
@@ -115,7 +115,7 @@ def run_llm_pipeline(
     use_cb : bool = False,
     streamer: StreamerWithResults | Callable | StreamerBase = None
 ) -> List[GenerationResult]:
-    properties = get_default_llm_propeties()
+    properties = get_default_llm_properties()
     if use_cb:
         properties['scheduler_config'] = SchedulerConfig()
     ov_pipe = LLMPipeline(models_path, device='CPU', **properties)
diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
index 67bf51a9e4..03e95daf19 100644
--- a/tests/python_tests/ov_genai_test_utils.py
+++ b/tests/python_tests/ov_genai_test_utils.py
@@ -16,7 +16,7 @@
 import openvino_genai as ov_genai
 from common import delete_rt_info
 
-from utils.constants import get_default_llm_propeties
+from utils.constants import get_default_llm_properties
 
 def get_models_list():
     precommit_models = [
@@ -96,7 +96,7 @@ def read_model(params, **tokenizer_kwargs):
 
     if (models_path / "openvino_model.xml").exists():
         opt_model = OVModelForCausalLM.from_pretrained(models_path, trust_remote_code=True,
-                                                       compile=False, device='CPU', ov_config=get_default_llm_propeties())
+                                                       compile=False, device='CPU', ov_config=get_default_llm_properties())
     else:
         ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(hf_tokenizer,
                                                                              with_detokenizer=True,
@@ -108,7 +108,7 @@ def read_model(params, **tokenizer_kwargs):
         hf_tokenizer.save_pretrained(models_path)
 
         opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True,
-                                                       compile=False, device='CPU', load_in_8bit=False, ov_config=get_default_llm_propeties())
+                                                       compile=False, device='CPU', load_in_8bit=False, ov_config=get_default_llm_properties())
         opt_model.generation_config.save_pretrained(models_path)
         opt_model.config.save_pretrained(models_path)
         opt_model.save_pretrained(models_path)
@@ -118,7 +118,7 @@ def read_model(params, **tokenizer_kwargs):
         models_path,
         hf_tokenizer,
         opt_model,
-        ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **get_default_llm_propeties()),
+        ov_genai.LLMPipeline(models_path, 'CPU', ENABLE_MMAP=False, **get_default_llm_properties()),
     )
 
 
@@ -183,7 +183,7 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
         with (temp_path / config_name).open('w') as f:
             json.dump(config_json, f)
 
-    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **get_default_llm_propeties())
+    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU', **get_default_llm_properties())
 
     for _, config_name in configs:
         os.remove(temp_path / config_name)
@@ -193,4 +193,4 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
 
 @functools.lru_cache(1)
 def get_continuous_batching(path):
-    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **get_default_llm_propeties())
+    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig(), **get_default_llm_properties())
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index b260e8ec09..ba3817c071 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -38,7 +38,7 @@ def read_models_list(file_name: str):
 from utils.generation_config import get_greedy, get_beam_search, \
     get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \
     get_multinomial_temperature_and_top_k, get_multinomial_temperature, get_multinomial_temperature_and_top_p
-from utils.constants import get_default_llm_propeties
+from utils.constants import get_default_llm_properties
 from utils.hugging_face import get_hugging_face_models, convert_models
 
 #
@@ -164,7 +164,7 @@ def test_post_oom_health(tmp_path, sampling_config):
     models_path : Path = tmp_path / model_id
     convert_models(opt_model, hf_tokenizer, models_path)
 
-    cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **get_default_llm_propeties())
+    cb_pipe = ContinuousBatchingPipeline(models_path, Tokenizer(models_path), scheduler_config, "CPU", **get_default_llm_properties())
 
     # First run should return incomplete response
     output = cb_pipe.generate(["What is OpenVINO?"], [generation_config])
diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
index b4312fe579..dd9717b22e 100644
--- a/tests/python_tests/test_kv_cache_eviction.py
+++ b/tests/python_tests/test_kv_cache_eviction.py
@@ -20,7 +20,7 @@
 from common import TESTS_ROOT, run_cb_pipeline_with_ref
 from utils.longbench import dataset2maxlen, evaluate, preprocess_prompt, post_process_pred
 
-from utils.constants import get_default_llm_propeties
+from utils.constants import get_default_llm_properties
 
 
 def load_prompts_dataset(file_name : str) -> Dict[str, List[str]]:
@@ -47,7 +47,7 @@ class ConvertedModel:
 @pytest.fixture(scope='module')
 def converted_model(tmp_path_factory):
     model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=get_default_llm_propeties())
+    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False, compile=False, ov_config=get_default_llm_properties())
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     models_path = tmp_path_factory.mktemp("cacheopt_test_models") / model_id
     model.save_pretrained(models_path)
@@ -126,8 +126,8 @@ def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, t
     scheduler_config_opt.enable_prefix_caching = enable_prefix_caching
 
     models_path = converted_model.models_path
-    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, get_default_llm_propeties())
-    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, get_default_llm_propeties())
+    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, "CPU", {}, get_default_llm_properties())
+    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, "CPU", {}, get_default_llm_properties())
 
     tokenizer = converted_model.tokenizer
 
@@ -239,8 +239,8 @@ def test_optimized_generation_longbench(qwen2_converted_model, device, test_stru
     if scheduler_config_opt.use_cache_eviction:
         scheduler_config_opt.cache_eviction_config = LONGBENCH_CACHE_EVICTION_CONFIG
 
-    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, {}, get_default_llm_propeties())
-    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, {}, get_default_llm_propeties())
+    model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, {}, get_default_llm_properties())
+    model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, {}, get_default_llm_properties())
 
     model_name = "/".join(models_path.parts[-2:])
     subset = test_struct.subset
diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py
index 15f5324331..431d9d88c4 100644
--- a/tests/python_tests/test_llm_pipeline_static.py
+++ b/tests/python_tests/test_llm_pipeline_static.py
@@ -12,7 +12,7 @@
     get_chat_models_list,
     read_model
 )
-from utils.constants import get_default_llm_propeties
+from utils.constants import get_default_llm_properties
 from utils.generation_config import                     \
     get_greedy,                                         \
     get_greedy_with_penalties,                          \
@@ -32,7 +32,7 @@
                       'NPUW_ONLINE_PIPELINE': 'NONE',
                       'PREFILL_CONFIG': { },
                       'GENERATE_CONFIG': { }
-                } | get_default_llm_propeties()
+                } | get_default_llm_properties()
 
 
 def generate_chat_history(model_path, device, pipeline_config, questions):
@@ -54,7 +54,7 @@ def test_generation_compare_with_stateful(generation_config):
     prompt = 'What is OpenVINO?'
     model_path = read_model(get_models_list()[0])[1]
 
-    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **get_default_llm_propeties())
+    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **get_default_llm_properties())
     ref_out = stateful_pipe.generate(prompt, generation_config)
 
     static_pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
@@ -220,7 +220,7 @@ def test_chat_generation():
 
     model_path = read_model(get_chat_models_list()[0])[1]
 
-    chat_history_stateful = generate_chat_history(model_path, "CPU", get_default_llm_propeties(), questions)
+    chat_history_stateful = generate_chat_history(model_path, "CPU", get_default_llm_properties(), questions)
     chat_history_static   = generate_chat_history(model_path, "NPU", common_config, questions)
 
     print('npu chat: \n{chat_history_static}\n')
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 0271218903..a8f5f86360 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -9,7 +9,7 @@
 from openvino_genai import VLMPipeline, GenerationConfig
 
 from utils.generation_config import get_beam_search, get_multinomial_all_parameters
-from utils.constants import get_default_llm_propeties
+from utils.constants import get_default_llm_properties
 
 def get_ov_model(model_id, cache):
     model_dir = cache.mkdir(model_id.split('/')[-1])
@@ -20,7 +20,7 @@ def get_ov_model(model_id, cache):
     ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(processor.tokenizer, with_detokenizer=True)
     openvino.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml")
     openvino.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml")
-    model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_llm_propeties())
+    model = OVModelForVisualCausalLM.from_pretrained(model_id, compile=False, device="CPU", export=True, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_llm_properties())
     if processor.tokenizer.chat_template is not None:
         processor.chat_template = processor.tokenizer.chat_template  # It seems that tiny-random-phi3-vision is saved incorrectly. That line works this around.
     processor.save_pretrained(model_dir)
diff --git a/tests/python_tests/utils/constants.py b/tests/python_tests/utils/constants.py
index ef4c4b32e2..d33b6b7bf1 100644
--- a/tests/python_tests/utils/constants.py
+++ b/tests/python_tests/utils/constants.py
@@ -4,7 +4,7 @@
 import openvino.properties.hint as hints
 import openvino as ov
 
-def get_default_llm_propeties():
+def get_default_llm_properties():
     return {
         hints.inference_precision : ov.Type.f32,
         hints.kv_cache_precision : ov.Type.f16,
diff --git a/tests/python_tests/utils/hugging_face.py b/tests/python_tests/utils/hugging_face.py
index d7f4f7e060..eddb43412c 100644
--- a/tests/python_tests/utils/hugging_face.py
+++ b/tests/python_tests/utils/hugging_face.py
@@ -12,7 +12,7 @@
 from openvino_genai import GenerationResult, GenerationConfig, StopCriteria
 from openvino_tokenizers import convert_tokenizer
 
-from utils.constants import get_default_llm_propeties
+from utils.constants import get_default_llm_properties
 
 def generation_config_to_hf(
     default_generation_config : HFGenerationConfig,
@@ -156,7 +156,7 @@ def run_hugging_face(
 
 def get_hugging_face_models(model_id: str):
     hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_llm_propeties())
+    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False, load_in_8bit=False, trust_remote_code=True, ov_config=get_default_llm_properties())
     return opt_model, hf_tokenizer
 
 

From 56ef645f25298fe8f8d83eaf5d3ca013427bdb2e Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Wed, 12 Feb 2025 13:01:03 +0400
Subject: [PATCH 14/14] Fix llm test

---
 tests/python_tests/test_llm_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
index 721d2900b3..7dd4d98708 100644
--- a/tests/python_tests/test_llm_pipeline.py
+++ b/tests/python_tests/test_llm_pipeline.py
@@ -172,7 +172,7 @@ def test_chat_scenario_several_chats_in_series():
 
     generation_config_kwargs, _ = chat_intpus[0]
     ov_generation_config = GenerationConfig(**generation_config_kwargs)
-    hf_generation_config = convert_to_hf(opt_model.generation_config, ov_generation_config)
+    hf_generation_config = generation_config_to_hf(opt_model.generation_config, ov_generation_config)
 
     for i in range(2):
         chat_history_hf = []