Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[llm_bench] update optimum bench hook for transformer-based imagegen #1525

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions tools/llm_bench/llm_bench_utils/hook_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def __init__(self):
self.text_encoder_step_count = 0
self.unet_step_count = 0
self.vae_decoder_step_count = 0
self.main_model_name = "unet"

def get_text_encoder_latency(self):
return (self.text_encoder_time / self.text_encoder_step_count) * 1000 if self.text_encoder_step_count > 0 else 0
Expand Down Expand Up @@ -56,7 +57,9 @@ def my_text_encoder(inputs, share_inputs=True, **kwargs):
pipe.text_encoder.request = my_text_encoder

def new_unet(self, pipe):
old_unet = pipe.unet.request
main_model = pipe.unet if pipe.unet is not None else pipe.transformer
self.main_model_name = "unet" if pipe.unet is not None else "transformer"
old_unet = main_model.request

def my_unet(inputs, share_inputs=True, **kwargs):
t1 = time.time()
Expand All @@ -66,7 +69,7 @@ def my_unet(inputs, share_inputs=True, **kwargs):
self.unet_time_list.append(unet_time)
self.unet_step_count += 1
return r
pipe.unet.request = my_unet
main_model.request = my_unet

def new_vae_decoder(self, pipe):
old_vae_decoder = pipe.vae_decoder.request
Expand Down
8 changes: 4 additions & 4 deletions tools/llm_bench/llm_bench_utils/metrics_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,17 @@ def print_stable_diffusion_infer_latency(iter_str, iter_data, stable_diffusion,
iter_data['first_token_infer_latency'] = iter_data['first_token_latency']
iter_data['other_tokens_infer_avg_latency'] = iter_data['other_tokens_avg_latency']
prefix = f'[{iter_str}][P{prompt_idx}]'
log.info(f"{prefix} First step of unet latency: {iter_data['first_token_latency']:.2f} ms/step, "
f"other steps of unet latency: {iter_data['other_tokens_avg_latency']:.2f} ms/step",)
log.info(f"{prefix} First step of {stable_diffusion.main_model_name} latency: {iter_data['first_token_latency']:.2f} ms/step, "
f"other steps of {stable_diffusion.main_model_name} latency: {iter_data['other_tokens_avg_latency']:.2f} ms/step",)
has_text_encoder_time = stable_diffusion.get_text_encoder_step_count() != -1
log_str = (
f"{prefix} Text encoder latency: {stable_diffusion.get_text_encoder_latency():.2f}" if has_text_encoder_time else f"{prefix} Text encoder latency: N/A "
f"unet latency: {stable_diffusion.get_unet_latency():.2f} ms/step, "
f"{stable_diffusion.main_model_name} latency: {stable_diffusion.get_unet_latency():.2f} ms/step, "
f"vae decoder latency: {stable_diffusion.get_vae_decoder_latency():.2f} ms/step, ")
if has_text_encoder_time:
log_str += f"text encoder step count: {stable_diffusion.get_text_encoder_step_count()}, "
log_str += (
f"unet step count: {stable_diffusion.get_unet_step_count()}, "
f"{stable_diffusion.main_model_name} step count: {stable_diffusion.get_unet_step_count()}, "
f"vae decoder step count: {stable_diffusion.get_vae_decoder_step_count()}")
log.info(log_str)

Expand Down
10 changes: 7 additions & 3 deletions tools/llm_bench/llm_bench_utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,11 @@ def create_genai_image_gen_model(model_path, device, ov_config, **kwargs):
import openvino_genai

class PerfCollector:
def __init__(self) -> types.NoneType:
def __init__(self, main_model_name="unet") -> types.NoneType:
self.iteration_time = []
self.start_time = time.perf_counter()
self.duration = -1
self.main_model_name = main_model_name
eaidova marked this conversation as resolved.
Show resolved Hide resolved

def __call__(self, step, num_steps, latents):
self.iteration_time.append(time.perf_counter() - self.start_time)
Expand Down Expand Up @@ -405,8 +406,6 @@ def get_unet_step_count(self):
def get_vae_decoder_step_count(self):
return 1

callback = PerfCollector()

adapter_config = get_lora_config(kwargs.get("lora", None), kwargs.get("lora_alphas", []))
if adapter_config:
ov_config['adapters'] = adapter_config
Expand All @@ -416,6 +415,11 @@ def get_vae_decoder_step_count(self):
data = json.load(f)

model_class_name = data.get("_class_name", "")
main_model_name = "unet" if "unet" in data else "transformer"
callback = PerfCollector(main_model_name)

orig_tokenizer = AutoTokenizer.from_pretrained(model_path, subfolder="tokenizer")
callback.orig_tokenizer = orig_tokenizer

start = time.perf_counter()

Expand Down
5 changes: 3 additions & 2 deletions tools/llm_bench/task/image_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption, callback=None):
set_seed(args['seed'])
input_text = image_param['prompt']
input_token_size = callback.orig_tokenizer(input_text, return_tensors="pt").input_ids.numel()
input_args = collects_input_args(image_param, args['model_type'], args['model_name'], args["num_steps"], args.get("height"), args.get("width"), callback)
out_str = f"Input params: Batch_size={args['batch_size']}, " \
f"steps={input_args['num_inference_steps']}, width={input_args['width']}, height={input_args['height']}"
Expand Down Expand Up @@ -157,6 +158,7 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
generation_time = end - start
iter_data = gen_output_data.gen_iterate_data(
iter_idx=num,
in_size=input_token_size * args['batch_size'],
infer_count=input_args["num_inference_steps"],
gen_time=generation_time,
res_md5=result_md5_list,
Expand Down Expand Up @@ -230,8 +232,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")

if not use_genai:
metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
return iter_data_list, pretrain_time, iter_timestamp


Expand Down
Loading