address #3

lucidrains · Dec 12, 2024 · 3194f53 · 3194f53
1 parent fc44a58
commit 3194f53
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 1 deletion.
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'speculative-decoding',
   packages = find_packages(exclude=[]),
-  version = '0.1.2',
+  version = '0.1.4',
   license='MIT',
   description = 'Speculative Decoding',
   author = 'Phil Wang',

diff --git a/speculative_decoding/speculative_decoding.py b/speculative_decoding/speculative_decoding.py
@@ -234,6 +234,10 @@ def speculative_decoding(
                 cache = tuple(t[..., left_index:, :] for t in cache)
                 small_cache = tuple(t[..., left_index:, :] for t in small_cache)
 
+        else:
+            # if batch size of 1, just slice to be equal to the lone int in seq_lens
+            out = out[..., :seq_lens.item()]
+
         # sample the additional token, one of the tricks in the paper to better bound the worst case
 
         next_token = torch.multinomial(prob_next, 1)
@@ -388,6 +392,9 @@ def speculative_decoding_with_same_model(
                 out = out[:, left_index:]
                 cache = tuple(t[..., left_index:, :] for t in cache)
                 small_cache = tuple(t[..., left_index:, :] for t in small_cache)
+        else:
+            # if batch size of 1, just slice to be equal to the lone int in seq_lens
+            out = out[..., :seq_lens.item()]
 
         # sample the additional token, one of the tricks in the paper to better bound the worst case