Skip to content

Commit

Permalink
address #3
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Dec 12, 2024
1 parent fc44a58 commit 3194f53
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'speculative-decoding',
packages = find_packages(exclude=[]),
version = '0.1.2',
version = '0.1.4',
license='MIT',
description = 'Speculative Decoding',
author = 'Phil Wang',
Expand Down
7 changes: 7 additions & 0 deletions speculative_decoding/speculative_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ def speculative_decoding(
cache = tuple(t[..., left_index:, :] for t in cache)
small_cache = tuple(t[..., left_index:, :] for t in small_cache)

else:
# if batch size of 1, just slice to be equal to the lone int in seq_lens
out = out[..., :seq_lens.item()]

# sample the additional token, one of the tricks in the paper to better bound the worst case

next_token = torch.multinomial(prob_next, 1)
Expand Down Expand Up @@ -388,6 +392,9 @@ def speculative_decoding_with_same_model(
out = out[:, left_index:]
cache = tuple(t[..., left_index:, :] for t in cache)
small_cache = tuple(t[..., left_index:, :] for t in small_cache)
else:
# if batch size of 1, just slice to be equal to the lone int in seq_lens
out = out[..., :seq_lens.item()]

# sample the additional token, one of the tricks in the paper to better bound the worst case

Expand Down

0 comments on commit 3194f53

Please sign in to comment.