Fix demobert script error and benchmark failure

Signed-off-by: Rajeev Rao <[email protected]>
kunal-vaishnavi · Feb 4, 2022 · 7c30208 · 7c30208
1 parent 498dcb0
commit 7c30208
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 27 deletions.
diff --git a/demo/BERT/notebooks/Q-and-A.ipynb b/demo/BERT/notebooks/Q-and-A.ipynb
@@ -373,25 +373,27 @@
     "        elif device.value == 'CPU - Framework (PyTorch)':\n",
     "            output.clear_output()\n",
     "            for _ in range(N_RUN):\n",
-    "                answer, eval_time_elapsed = nlp({\n",
+    "                inference_time = time.time()\n",
+    "                answer = nlp({\n",
     "                        'question': question_text.value,\n",
     "                        'context': paragraph_text.value\n",
     "                        })\n",
     "                progress_bar.value += 1                \n",
-    "                inference_time_arr.append(eval_time_elapsed)\n",
+    "                inference_time_arr.append(time.time() - inference_time)\n",
     "                \n",
     "            print(\"Answer: '{}'\".format(answer['answer']))\n",
     "            print(\"With probability: {:.2f}%\".format(answer['score']*100))\n",
     "            print(\"Average inference time (over {} runs): {:.2f} ms\".format(N_RUN, 1000*np.mean(inference_time_arr)))   \n",
     "        elif  device.value == 'GPU - Framework (PyTorch)':  \n",
     "            output.clear_output()\n",
     "            for _ in range(N_RUN):\n",
-    "                answer, eval_time_elapsed = nlp_gpu({\n",
+    "                inference_time = time.time()\n",
+    "                answer = nlp_gpu({\n",
     "                        'question': question_text.value,\n",
     "                        'context': paragraph_text.value\n",
     "                        })\n",
     "                progress_bar.value += 1                \n",
-    "                inference_time_arr.append(eval_time_elapsed)\n",
+    "                inference_time_arr.append(time.time() - inference_time)\n",
     "                \n",
     "            print(\"Answer: '{}'\".format(answer['answer']))\n",
     "            print(\"With probability: {:.2f}%\".format(answer['score']*100))\n",

diff --git a/demo/BERT/notebooks/benchmark.ipynb b/demo/BERT/notebooks/benchmark.ipynb
@@ -145,20 +145,19 @@
     "            bench_times = {}\n",
     "\n",
     "            for idx, batch_size in enumerate(sorted(args.batch_size)):\n",
-    "                context.active_optimization_profile = 0\n",
+    "                num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles\n",
+    "                for idx in range(engine.num_optimization_profiles):\n",
+    "                    profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)\n",
+    "                    if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size:\n",
+    "                        context.active_optimization_profile = idx\n",
+    "                        binding_idx_offset = idx * num_binding_per_profile\n",
+    "                        break\n",
     "\n",
     "                # Each profile has unique bindings\n",
-    "                bindings = [buf.binding() for buf in buffers]\n",
-    "\n",
-    "                shapes = {\n",
-    "                    \"input_ids\": (args.sequence_length * batch_size, ),\n",
-    "                    \"segment_ids\": (args.sequence_length * batch_size, ),\n",
-    "                    \"cu_seqlens\": (batch_size + 1, ),\n",
-    "                    \"max_seqlen\": (args.sequence_length, ),\n",
-    "                }\n",
-    "\n",
-    "                for binding, shape in shapes.items():\n",
-    "                    context.set_binding_shape(engine[binding], shape)\n",
+    "                bindings = [0] * binding_idx_offset + [buf.binding() for buf in buffers]\n",
+    "                input_shape = (batch_size, args.sequence_length)\n",
+    "                for binding in range(3):\n",
+    "                    context.set_binding_shape(binding_idx_offset + binding, input_shape)\n",
     "                assert context.all_binding_shapes_specified\n",
     "\n",
     "                # Inference\n",
@@ -233,20 +232,19 @@
     "            bench_times = {}\n",
     "\n",
     "            for idx, batch_size in enumerate(sorted(args.batch_size)):\n",
-    "                context.active_optimization_profile = idx\n",
+    "                num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles\n",
+    "                for idx in range(engine.num_optimization_profiles):\n",
+    "                    profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)\n",
+    "                    if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size:\n",
+    "                        context.active_optimization_profile = idx\n",
+    "                        binding_idx_offset = idx * num_binding_per_profile\n",
+    "                        break\n",
     "\n",
     "                # Each profile has unique bindings\n",
-    "                binding_idx_offset = idx * num_binding_per_profile\n",
     "                bindings = [0] * binding_idx_offset + [buf.binding() for buf in buffers]\n",
-    "\n",
-    "                shapes = {\n",
-    "                    \"input_ids\": (batch_size, args.sequence_length),\n",
-    "                    \"segment_ids\": (batch_size, args.sequence_length),\n",
-    "                    \"input_mask\": (batch_size, args.sequence_length),\n",
-    "                }\n",
-    "\n",
-    "                for binding, shape in shapes.items():\n",
-    "                    context.set_binding_shape(engine[binding] + binding_idx_offset, shape)\n",
+    "                input_shape = (batch_size, args.sequence_length)\n",
+    "                for binding in range(3):\n",
+    "                    context.set_binding_shape(binding_idx_offset + binding, input_shape)\n",
     "                assert context.all_binding_shapes_specified\n",
     "\n",
     "                # Inference\n",