diff --git a/src/triton_cli/__init__.py b/src/triton_cli/__init__.py index ed8df84..b55338f 100644 --- a/src/triton_cli/__init__.py +++ b/src/triton_cli/__init__.py @@ -24,4 +24,4 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "0.0.9" +__version__ = "0.0.10dev" diff --git a/src/triton_cli/client/client.py b/src/triton_cli/client/client.py index 344b596..0db429c 100644 --- a/src/triton_cli/client/client.py +++ b/src/triton_cli/client/client.py @@ -197,6 +197,13 @@ def generate_data(self, config: dict, data_mode: str): infer_inputs.append( self.__create_triton_input(name, shape, triton_dtype, data) ) + json_input = { + "name": name, + "shape": str(data.shape), + "dtype": triton_dtype, + "value": np.array_str(data), + } + logger.info(f"Input:\n{json.dumps(json_input, indent=4)}") return infer_inputs @@ -294,17 +301,18 @@ def __process_infer_result(self, result): for output in response["outputs"]: name = output["name"] # TODO: Need special logic for string/bytes type - np_data = result.as_numpy(name) + data = result.as_numpy(name) # WAR for LLMs - if np_data.dtype == np.object_: + if data.dtype == np.object_: # Assume 2D-output (batch_size, texts) - texts = np_data.flatten() - np_data = np.array([text.decode("utf-8") for text in texts]) + texts = data.flatten() + data = np.array([text.decode("utf-8") for text in texts]) - output_data_str = np.array_str(np_data) + output_data_str = np.array_str(data) json_output = { "name": name, - "shape": str(np_data.shape), + "shape": str(data.shape), + "dtype": output["datatype"], "value": output_data_str, } logger.info(f"Output:\n{json.dumps(json_output, indent=4)}")