From 7c302081253667070beea57ac2542829a1323f88 Mon Sep 17 00:00:00 2001
From: Vincent Huang <vincenth@nvidia.com>
Date: Thu, 13 Jan 2022 08:22:15 +0000
Subject: [PATCH] Fix demobert script error and benchmark failure

Signed-off-by: Rajeev Rao <rajeevrao@nvidia.com>
---
 demo/BERT/notebooks/Q-and-A.ipynb   | 10 ++++---
 demo/BERT/notebooks/benchmark.ipynb | 44 ++++++++++++++---------------
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/demo/BERT/notebooks/Q-and-A.ipynb b/demo/BERT/notebooks/Q-and-A.ipynb
index ab65f6b7..c262a9cb 100755
--- a/demo/BERT/notebooks/Q-and-A.ipynb
+++ b/demo/BERT/notebooks/Q-and-A.ipynb
@@ -373,12 +373,13 @@
     "        elif device.value == 'CPU - Framework (PyTorch)':\n",
     "            output.clear_output()\n",
     "            for _ in range(N_RUN):\n",
-    "                answer, eval_time_elapsed = nlp({\n",
+    "                inference_time = time.time()\n",
+    "                answer = nlp({\n",
     "                        'question': question_text.value,\n",
     "                        'context': paragraph_text.value\n",
     "                        })\n",
     "                progress_bar.value += 1                \n",
-    "                inference_time_arr.append(eval_time_elapsed)\n",
+    "                inference_time_arr.append(time.time() - inference_time)\n",
     "                \n",
     "            print(\"Answer: '{}'\".format(answer['answer']))\n",
     "            print(\"With probability: {:.2f}%\".format(answer['score']*100))\n",
@@ -386,12 +387,13 @@
     "        elif  device.value == 'GPU - Framework (PyTorch)':  \n",
     "            output.clear_output()\n",
     "            for _ in range(N_RUN):\n",
-    "                answer, eval_time_elapsed = nlp_gpu({\n",
+    "                inference_time = time.time()\n",
+    "                answer = nlp_gpu({\n",
     "                        'question': question_text.value,\n",
     "                        'context': paragraph_text.value\n",
     "                        })\n",
     "                progress_bar.value += 1                \n",
-    "                inference_time_arr.append(eval_time_elapsed)\n",
+    "                inference_time_arr.append(time.time() - inference_time)\n",
     "                \n",
     "            print(\"Answer: '{}'\".format(answer['answer']))\n",
     "            print(\"With probability: {:.2f}%\".format(answer['score']*100))\n",
diff --git a/demo/BERT/notebooks/benchmark.ipynb b/demo/BERT/notebooks/benchmark.ipynb
index f2697b32..69666732 100755
--- a/demo/BERT/notebooks/benchmark.ipynb
+++ b/demo/BERT/notebooks/benchmark.ipynb
@@ -145,20 +145,19 @@
     "            bench_times = {}\n",
     "\n",
     "            for idx, batch_size in enumerate(sorted(args.batch_size)):\n",
-    "                context.active_optimization_profile = 0\n",
+    "                num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles\n",
+    "                for idx in range(engine.num_optimization_profiles):\n",
+    "                    profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)\n",
+    "                    if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size:\n",
+    "                        context.active_optimization_profile = idx\n",
+    "                        binding_idx_offset = idx * num_binding_per_profile\n",
+    "                        break\n",
     "\n",
     "                # Each profile has unique bindings\n",
-    "                bindings = [buf.binding() for buf in buffers]\n",
-    "\n",
-    "                shapes = {\n",
-    "                    \"input_ids\": (args.sequence_length * batch_size, ),\n",
-    "                    \"segment_ids\": (args.sequence_length * batch_size, ),\n",
-    "                    \"cu_seqlens\": (batch_size + 1, ),\n",
-    "                    \"max_seqlen\": (args.sequence_length, ),\n",
-    "                }\n",
-    "\n",
-    "                for binding, shape in shapes.items():\n",
-    "                    context.set_binding_shape(engine[binding], shape)\n",
+    "                bindings = [0] * binding_idx_offset + [buf.binding() for buf in buffers]\n",
+    "                input_shape = (batch_size, args.sequence_length)\n",
+    "                for binding in range(3):\n",
+    "                    context.set_binding_shape(binding_idx_offset + binding, input_shape)\n",
     "                assert context.all_binding_shapes_specified\n",
     "\n",
     "                # Inference\n",
@@ -233,20 +232,19 @@
     "            bench_times = {}\n",
     "\n",
     "            for idx, batch_size in enumerate(sorted(args.batch_size)):\n",
-    "                context.active_optimization_profile = idx\n",
+    "                num_binding_per_profile = engine.num_bindings // engine.num_optimization_profiles\n",
+    "                for idx in range(engine.num_optimization_profiles):\n",
+    "                    profile_shape = engine.get_profile_shape(profile_index = idx, binding = idx * num_binding_per_profile)\n",
+    "                    if profile_shape[0][0] <= batch_size and profile_shape[2][0] >= batch_size:\n",
+    "                        context.active_optimization_profile = idx\n",
+    "                        binding_idx_offset = idx * num_binding_per_profile\n",
+    "                        break\n",
     "\n",
     "                # Each profile has unique bindings\n",
-    "                binding_idx_offset = idx * num_binding_per_profile\n",
     "                bindings = [0] * binding_idx_offset + [buf.binding() for buf in buffers]\n",
-    "\n",
-    "                shapes = {\n",
-    "                    \"input_ids\": (batch_size, args.sequence_length),\n",
-    "                    \"segment_ids\": (batch_size, args.sequence_length),\n",
-    "                    \"input_mask\": (batch_size, args.sequence_length),\n",
-    "                }\n",
-    "\n",
-    "                for binding, shape in shapes.items():\n",
-    "                    context.set_binding_shape(engine[binding] + binding_idx_offset, shape)\n",
+    "                input_shape = (batch_size, args.sequence_length)\n",
+    "                for binding in range(3):\n",
+    "                    context.set_binding_shape(binding_idx_offset + binding, input_shape)\n",
     "                assert context.all_binding_shapes_specified\n",
     "\n",
     "                # Inference\n",