rename for the sake of clarity

octoml · masahi · Jan 31, 2024 · Jan 22, 2024 · Jan 23, 2024 · Jan 25, 2024
commit ebae20023a7d4b77a2c3b3e1f21c8279682dcc2b
diff --git a/serve/mlc_serve/engine/base.py b/serve/mlc_serve/engine/base.py
@@ -17,9 +17,9 @@
 
 @dataclass
 class RawLogprobsInfo:
-    current_token: int
+    current_token_id: int
     current_logprob: float
-    top_tokens: Optional[np.array]
+    top_token_ids: Optional[np.array]
     top_logprobs: Optional[np.array]
     previous_tokens: Optional[List[int]]
 

diff --git a/serve/mlc_serve/engine/engine_common.py b/serve/mlc_serve/engine/engine_common.py
@@ -146,16 +146,16 @@ def logprob_detokenize(
         return None
 
     top_logprobs: List[TopLogprobs] = []
-    if logprob_info.top_tokens is not None and logprob_info.top_logprobs is not None:
-        top_tokens = list(zip(logprob_info.top_tokens, logprob_info.top_logprobs))
+    if logprob_info.top_token_ids is not None and logprob_info.top_logprobs is not None:
+        top_tokens = list(zip(logprob_info.top_token_ids, logprob_info.top_logprobs))
         if logprob_info.previous_tokens is None:
             logprob_info.previous_tokens = []
-        for top_token, top_logprob in top_tokens:
+        for top_token_id, top_logprob in top_tokens:
             # TODO(vvchernov): not clear what do we want
             # detokenized = tokenizer.convert_ids_to_tokens(
             #     logprob_info.previous_tokens + [top_token]
             # )[-1]
-            detokenized = tokenizer.decode(top_token)
+            detokenized = tokenizer.decode(top_token_id)
             top_logprobs.append(
                 TopLogprobs(
                     token=detokenized,
@@ -166,7 +166,7 @@ def logprob_detokenize(
             )
 
     logprobs_content = LogprobsContent(
-        token=tokenizer.decode([logprob_info.current_token]),
+        token=tokenizer.decode([logprob_info.current_token_id]),
         logprob=logprob_info.current_logprob,
         # TODO(vvchernov): implement bytes based on https://platform.openai.com/docs/api-reference/chat/object
         bytes=None,

diff --git a/serve/mlc_serve/model/model_common.py b/serve/mlc_serve/model/model_common.py
@@ -40,11 +40,11 @@ def get_num_cache_blocks(
 
 def get_raw_logprob_info(
     logits,
-    token,
+    token_id,
     top_logprobs_num,
 ) -> RawLogprobsInfo:
     logprobs = torch.log_softmax(logits, dim=-1)
-    res_logprob = logprobs[token].cpu().numpy()
+    res_logprob = logprobs[token_id].cpu().numpy()
 
     if top_logprobs_num == 0:
         top_logprobs = None
@@ -59,9 +59,9 @@ def get_raw_logprob_info(
 
     # Set to raw logprob info
     return RawLogprobsInfo(
-        current_token=token,
+        current_token_id=token_id,
         current_logprob=res_logprob,
-        top_tokens=top_tokens,
+        top_token_ids=top_tokens,
         top_logprobs=top_logprobs,
         previous_tokens=None
     )
@@ -72,7 +72,7 @@ def get_masked_logprobs(
     mask: torch.Tensor,
     sampling_params: List[SamplingParams],
     logits: torch.Tensor,
-    tokens: torch.Tensor,
+    token_ids: torch.Tensor,
 ) -> List[Optional[RawLogprobsInfo]]:
     num_seq = len(logprob_infos)
 
@@ -82,7 +82,7 @@ def get_masked_logprobs(
             if sampling_params[i].logprobs:
                 logprob_infos[i] = get_raw_logprob_info(
                     logits[mask_counter],
-                    tokens[mask_counter],
+                    token_ids[mask_counter],
                     sampling_params[i].top_logprobs,
                 )
             mask_counter = mask_counter + 1