From 7c302081253667070beea57ac2542829a1323f88 Mon Sep 17 00:00:00 2001 From: Vincent Huang Date: Thu, 13 Jan 2022 08:22:15 +0000 Subject: [PATCH] Fix demobert script error and benchmark failure Signed-off-by: Rajeev Rao --- demo/BERT/notebooks/Q-and-A.ipynb | 10 ++++--- demo/BERT/notebooks/benchmark.ipynb | 44 ++++++++++++++--------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/demo/BERT/notebooks/Q-and-A.ipynb b/demo/BERT/notebooks/Q-and-A.ipynb index ab65f6b7..c262a9cb 100755 --- a/demo/BERT/notebooks/Q-and-A.ipynb +++ b/demo/BERT/notebooks/Q-and-A.ipynb @@ -373,12 +373,13 @@ " elif device.value == 'CPU - Framework (PyTorch)':\n", " output.clear_output()\n", " for _ in range(N_RUN):\n", - " answer, eval_time_elapsed = nlp({\n", + " inference_time = time.time()\n", + " answer = nlp({\n", " 'question': question_text.value,\n", " 'context': paragraph_text.value\n", " })\n", " progress_bar.value += 1 \n", - " inference_time_arr.append(eval_time_elapsed)\n", + " inference_time_arr.append(time.time() - inference_time)\n", " \n", " print(\"Answer: '{}'\".format(answer['answer']))\n", " print(\"With probability: {:.2f}%\".format(answer['score']*100))\n", @@ -386,12 +387,13 @@ " elif device.value == 'GPU - Framework (PyTorch)': \n", " output.clear_output()\n", " for _ in range(N_RUN):\n", - " answer, eval_time_elapsed = nlp_gpu({\n", + " inference_time = time.time()\n", + " answer = nlp_gpu({\n", " 'question': question_text.value,\n", " 'context': paragraph_text.value\n", " })\n", " progress_bar.value += 1 \n", - " inference_time_arr.append(eval_time_elapsed)\n", + " inference_time_arr.append(time.time() - inference_time)\n", " \n", " print(\"Answer: '{}'\".format(answer['answer']))\n", " print(\"With probability: {:.2f}%\".format(answer['score']*100))\n", diff --git a/demo/BERT/notebooks/benchmark.ipynb b/demo/BERT/notebooks/benchmark.ipynb index f2697b32..69666732 100755 --- a/demo/BERT/notebooks/benchmark.ipynb +++ b/demo/BERT/notebooks/benchmark.ipynb @@ -145,20 +145,19 @@ " bench_times = {}\n", "\n", " for idx, batch_size in enumerate(sorted(args.batch_size)):\n", - " context.active_optimization_profile = 0\n", + " num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles\n", + " for idx in range(engine.num_optimization_profiles):\n", + " profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)\n", + " if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size:\n", + " context.active_optimization_profile = idx\n", + " binding_idx_offset = idx * num_binding_per_profile\n", + " break\n", "\n", " # Each profile has unique bindings\n", - " bindings = [buf.binding() for buf in buffers]\n", - "\n", - " shapes = {\n", - " \"input_ids\": (args.sequence_length * batch_size, ),\n", - " \"segment_ids\": (args.sequence_length * batch_size, ),\n", - " \"cu_seqlens\": (batch_size + 1, ),\n", - " \"max_seqlen\": (args.sequence_length, ),\n", - " }\n", - "\n", - " for binding, shape in shapes.items():\n", - " context.set_binding_shape(engine[binding], shape)\n", + " bindings = [0] * binding_idx_offset + [buf.binding() for buf in buffers]\n", + " input_shape = (batch_size, args.sequence_length)\n", + " for binding in range(3):\n", + " context.set_binding_shape(binding_idx_offset + binding, input_shape)\n", " assert context.all_binding_shapes_specified\n", "\n", " # Inference\n", @@ -233,20 +232,19 @@ " bench_times = {}\n", "\n", " for idx, batch_size in enumerate(sorted(args.batch_size)):\n", - " context.active_optimization_profile = idx\n", + " num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles\n", + " for idx in range(engine.num_optimization_profiles):\n", + " profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)\n", + " if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size:\n", + " context.active_optimization_profile = idx\n", + " binding_idx_offset = idx * num_binding_per_profile\n", + " break\n", "\n", " # Each profile has unique bindings\n", - " binding_idx_offset = idx * num_binding_per_profile\n", " bindings = [0] * binding_idx_offset + [buf.binding() for buf in buffers]\n", - "\n", - " shapes = {\n", - " \"input_ids\": (batch_size, args.sequence_length),\n", - " \"segment_ids\": (batch_size, args.sequence_length),\n", - " \"input_mask\": (batch_size, args.sequence_length),\n", - " }\n", - "\n", - " for binding, shape in shapes.items():\n", - " context.set_binding_shape(engine[binding] + binding_idx_offset, shape)\n", + " input_shape = (batch_size, args.sequence_length)\n", + " for binding in range(3):\n", + " context.set_binding_shape(binding_idx_offset + binding, input_shape)\n", " assert context.all_binding_shapes_specified\n", "\n", " # Inference\n",