Skip to content

Commit

Permalink
Rename output_* to return_*
Browse files Browse the repository at this point in the history
  • Loading branch information
kthui committed Nov 7, 2024
1 parent 1773dea commit 29099df
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 44 deletions.
60 changes: 30 additions & 30 deletions ci/L0_additional_outputs_vllm/additional_outputs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def _get_inputs(
prompt,
stream=True,
sampling_parameters=None,
output_finish_reason=None,
output_cumulative_logprob=None,
output_num_token_ids=None,
return_finish_reason=None,
return_cumulative_logprob=None,
return_num_token_ids=None,
):
inputs = []

Expand All @@ -64,21 +64,21 @@ def _get_inputs(
)
)

if output_finish_reason is not None:
inputs.append(grpcclient.InferInput("output_finish_reason", [1], "BOOL"))
inputs[-1].set_data_from_numpy(np.array([output_finish_reason], dtype=bool))
if return_finish_reason is not None:
inputs.append(grpcclient.InferInput("return_finish_reason", [1], "BOOL"))
inputs[-1].set_data_from_numpy(np.array([return_finish_reason], dtype=bool))

if output_cumulative_logprob is not None:
if return_cumulative_logprob is not None:
inputs.append(
grpcclient.InferInput("output_cumulative_logprob", [1], "BOOL")
grpcclient.InferInput("return_cumulative_logprob", [1], "BOOL")
)
inputs[-1].set_data_from_numpy(
np.array([output_cumulative_logprob], dtype=bool)
np.array([return_cumulative_logprob], dtype=bool)
)

if output_num_token_ids is not None:
inputs.append(grpcclient.InferInput("output_num_token_ids", [1], "BOOL"))
inputs[-1].set_data_from_numpy(np.array([output_num_token_ids], dtype=bool))
if return_num_token_ids is not None:
inputs.append(grpcclient.InferInput("return_num_token_ids", [1], "BOOL"))
inputs[-1].set_data_from_numpy(np.array([return_num_token_ids], dtype=bool))

return inputs

Expand All @@ -104,12 +104,12 @@ def _assert_text_output_valid(self):
assert len(text_output) > 0, "output is empty"
assert text_output.count(" ") > 4, "output is not a sentence"

def _assert_finish_reason(self, output_finish_reason):
def _assert_finish_reason(self, return_finish_reason):
for i in range(len(self._responses)):
result, error = self._responses[i]["result"], self._responses[i]["error"]
assert error is None
finish_reason_np = result.as_numpy(name="finish_reason")
if output_finish_reason is None or output_finish_reason == False:
if return_finish_reason is None or return_finish_reason == False:
assert finish_reason_np is None
continue
finish_reason = finish_reason_np[0].decode("utf-8")
Expand All @@ -118,25 +118,25 @@ def _assert_finish_reason(self, output_finish_reason):
else:
assert finish_reason == "length"

def _assert_cumulative_logprob(self, output_cumulative_logprob):
def _assert_cumulative_logprob(self, return_cumulative_logprob):
prev_cumulative_logprob = 0.0
for response in self._responses:
result, error = response["result"], response["error"]
assert error is None
cumulative_logprob_np = result.as_numpy(name="cumulative_logprob")
if output_cumulative_logprob is None or output_cumulative_logprob == False:
if return_cumulative_logprob is None or return_cumulative_logprob == False:
assert cumulative_logprob_np is None
continue
cumulative_logprob = cumulative_logprob_np[0].astype(float)
assert cumulative_logprob != prev_cumulative_logprob
prev_cumulative_logprob = cumulative_logprob

def _assert_num_token_ids(self, output_num_token_ids):
def _assert_num_token_ids(self, return_num_token_ids):
for response in self._responses:
result, error = response["result"], response["error"]
assert error is None
num_token_ids_np = result.as_numpy(name="num_token_ids")
if output_num_token_ids is None or output_num_token_ids == False:
if return_num_token_ids is None or return_num_token_ids == False:
assert num_token_ids_np is None
continue
num_token_ids = num_token_ids_np[0].astype(int)
Expand All @@ -160,26 +160,26 @@ def _assert_num_token_ids(self, output_num_token_ids):
assert num_token_ids >= 0

@pytest.mark.parametrize("stream", [True, False])
@pytest.mark.parametrize("output_finish_reason", [None, True, False])
@pytest.mark.parametrize("output_cumulative_logprob", [None, True, False])
@pytest.mark.parametrize("output_num_token_ids", [None, True, False])
@pytest.mark.parametrize("return_finish_reason", [None, True, False])
@pytest.mark.parametrize("return_cumulative_logprob", [None, True, False])
@pytest.mark.parametrize("return_num_token_ids", [None, True, False])
def test_additional_outputs(
self,
stream,
output_finish_reason,
output_cumulative_logprob,
output_num_token_ids,
return_finish_reason,
return_cumulative_logprob,
return_num_token_ids,
):
inputs = self._get_inputs(
self._prompt,
stream=stream,
sampling_parameters=self._sampling_parameters,
output_finish_reason=output_finish_reason,
output_cumulative_logprob=output_cumulative_logprob,
output_num_token_ids=output_num_token_ids,
return_finish_reason=return_finish_reason,
return_cumulative_logprob=return_cumulative_logprob,
return_num_token_ids=return_num_token_ids,
)
self._llm_infer(inputs)
self._assert_text_output_valid()
self._assert_finish_reason(output_finish_reason)
self._assert_cumulative_logprob(output_cumulative_logprob)
self._assert_num_token_ids(output_num_token_ids)
self._assert_finish_reason(return_finish_reason)
self._assert_cumulative_logprob(return_cumulative_logprob)
self._assert_num_token_ids(return_num_token_ids)
8 changes: 4 additions & 4 deletions docs/additional_outputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ The reason why the sequence is finished. See
[here](https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/outputs.py#L26)
for more details.

To enable, set `output_finish_reason` input tensor to `True`. The reason will be
To enable, set `return_finish_reason` input tensor to `True`. The reason will be
sent as a string on the `finish_reason` output tensor.

Supported since r24.11.
Expand All @@ -54,7 +54,7 @@ The cumulative log probability of the generated output text. See
[here](https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/outputs.py#L22)
for more details.

To enable, set `output_cumulative_logprob` input tensor to `True`. The floating
To enable, set `return_cumulative_logprob` input tensor to `True`. The floating
point value will be sent on the `cumulative_logprob` output tensor.

Supported since r24.11.
Expand All @@ -68,7 +68,7 @@ presumed to be zero. See
[here](https://github.com/vllm-project/vllm/blob/v0.6.3.post1/vllm/outputs.py#L21)
for more details on the token IDs of the generated output text.

To enable, set `output_num_token_ids` input tensor to `True`. The unsigned
To enable, set `return_num_token_ids` input tensor to `True`. The unsigned
integer value will be sent on the `num_token_ids` output tensor.

Supported since r24.11.
Expand All @@ -88,7 +88,7 @@ inputs[-1].set_data_from_numpy(
np.array(["example prompt".encode("utf-8")], dtype=np.object_)
)

inputs.append(grpcclient.InferInput("output_finish_reason", [1], "BOOL"))
inputs.append(grpcclient.InferInput("return_finish_reason", [1], "BOOL"))
inputs[-1].set_data_from_numpy(np.array([True], dtype=bool))

def callback(result, error):
Expand Down
20 changes: 10 additions & 10 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,19 @@ def _auto_complete_inputs_and_outputs(auto_complete_model_config):
"optional": True,
},
{
"name": "output_finish_reason",
"name": "return_finish_reason",
"data_type": "TYPE_BOOL",
"dims": [1],
"optional": True,
},
{
"name": "output_cumulative_logprob",
"name": "return_cumulative_logprob",
"data_type": "TYPE_BOOL",
"dims": [1],
"optional": True,
},
{
"name": "output_num_token_ids",
"name": "return_num_token_ids",
"data_type": "TYPE_BOOL",
"dims": [1],
"optional": True,
Expand Down Expand Up @@ -348,11 +348,11 @@ def _get_input_tensors(self, request):
else:
parameters = request.parameters()

# output_finish_reason, output_cumulative_logprob, output_num_token_ids
# return_finish_reason, return_cumulative_logprob, return_num_token_ids
additional_outputs = {
"output_finish_reason": None,
"output_cumulative_logprob": None,
"output_num_token_ids": None,
"return_finish_reason": None,
"return_cumulative_logprob": None,
"return_num_token_ids": None,
}
for tensor_name in additional_outputs.keys():
tensor = pb_utils.get_input_tensor_by_name(request, tensor_name)
Expand Down Expand Up @@ -445,7 +445,7 @@ def _create_response(
)

# finish_reason
if additional_outputs["output_finish_reason"]:
if additional_outputs["return_finish_reason"]:
finish_reason = [
str(output.finish_reason) for output in request_output.outputs
]
Expand All @@ -456,7 +456,7 @@ def _create_response(
)

# cumulative_logprob
if additional_outputs["output_cumulative_logprob"]:
if additional_outputs["return_cumulative_logprob"]:
cumulative_logprob = [
output.cumulative_logprob for output in request_output.outputs
]
Expand All @@ -468,7 +468,7 @@ def _create_response(
)

# num_token_ids
if additional_outputs["output_num_token_ids"]:
if additional_outputs["return_num_token_ids"]:
if prev_request_output is None:
# this is the first response
prev_lens = [0] * len(request_output.outputs)
Expand Down

0 comments on commit 29099df

Please sign in to comment.