From 66a2a0d47dbff005e520f0634ab628a40c1a247b Mon Sep 17 00:00:00 2001 From: Yingbei Date: Mon, 17 Jun 2024 17:17:00 -0700 Subject: [PATCH] updated readme, fix function call postprocess --- README.md | 33 +++++++++++++++++------- examples/server/function-call-parser.hpp | 12 +++++---- examples/server/utils.hpp | 2 +- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 6c24135d61934..7150d6f00b158 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,32 @@ -# llama.cpp +# tools.cpp -![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) +### tools.cpp quickstart +1. build from source: +Mac user +``` +make +``` -[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) -[![Server](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml/badge.svg?branch=master&event=schedule)](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml) -[![Conan Center](https://shields.io/conan/v/llama-cpp)](https://conan.io/center/llama-cpp) +Nvidia-Cuda user: +``` +make LLAMA_CUDA=1 +``` -[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml) +2. Install helper package: +``` +npm install jsonrepair +``` -Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ +3. Download a compatible gguf model: +For example: +``` +wget https://huggingface.co/sanjay920/Llama-3-8b-function-calling-alpha-v1.gguf/resolve/main/Llama-3-8b-function-calling-alpha-v1.gguf +``` -> [!IMPORTANT] -[2024 Jun 12] Binaries have been renamed w/ a `llama-` prefix. `main` is now `llama-cli`, `server` is `llama-server`, etc (https://github.com/ggerganov/llama.cpp/pull/7809) +4. start server: +``` +./llama-server -ngl 35 -m Llama-3-8b-function-calling-alpha-v1.gguf --port 1234 --host 0.0.0.0 -c 16000 --chat-template llama3 +``` ### Recent API changes diff --git a/examples/server/function-call-parser.hpp b/examples/server/function-call-parser.hpp index a9e8cc297910c..e131c96326f59 100644 --- a/examples/server/function-call-parser.hpp +++ b/examples/server/function-call-parser.hpp @@ -68,7 +68,6 @@ json clean_json_strings(const std::string& input_str) { // json repair here std::string fixed_str = jsonrepair(input_str); json data = json::parse(fixed_str); - for (auto& [key, value] : data.items()) { if (value.is_string()) { std::string val = value.get(); @@ -82,6 +81,7 @@ json clean_json_strings(const std::string& input_str) { if (v.is_string()) { v = clean_command_string(v.get()); } + } } } @@ -97,7 +97,7 @@ json clean_json_strings(const std::string& input_str) { std::vector rubra_fc_json_tool_extractor(const std::string& output_str) { std::vector result; - printf("OUTPUT STR TO BE PARSED : %s", output_str.c_str()); + printf("OUTPUT STR TO BE PARSED : %s\n", output_str.c_str()); if (output_str.find("endtoolcall") == std::string::npos) { return result; } @@ -111,7 +111,8 @@ std::vector rubra_fc_json_tool_extractor(const std::string& output_str) { size_t pos = segment.find("starttoolcall"); if (pos != std::string::npos) { // Extract substring after "toolcall" - listOfStrToParse.push_back(segment.substr(pos + std::string("starttoolcall").length())); + std::string ss = segment.substr(pos + std::string("starttoolcall").length()); + listOfStrToParse.push_back(ss); } start = end + std::string("endtoolcall").length(); // Move past the "endtoolcall" } @@ -121,9 +122,10 @@ std::vector rubra_fc_json_tool_extractor(const std::string& output_str) { try { for (const auto & line : listOfStrToParse) { // json fc = json::parse(line); + json fc = clean_json_strings(line); - if (fc["arguments"].is_string()) { - fc["arguments"] = json::parse(fc["arguments"].get()); + if (!fc["arguments"].is_string()) { + fc["arguments"] = fc["arguments"].dump(); } if (!fc.is_null()) { function_call_json.push_back(fc); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 4e91eb9e04ecd..75ad38bb061e5 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -474,7 +474,7 @@ static json format_final_response_oaicompat(const json & request, json result, c tool_call["function"] = json{ {"name" , pc["name"]}, - {"arguments" , pc["kwargs"].dump()}, + {"arguments" , pc["kwargs"]}, }; oai_format_tool_calls.push_back(tool_call); }