cleanup unused code, update readme

rubra-ai · Jun 29, 2024 · e2f621e · e2f621e
1 parent ef19dd7
commit e2f621e
Show file tree

Hide file tree

Showing 5 changed files with 93 additions and 129 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # tools.cpp
 
-### tools.cpp quickstart
+## tools.cpp quickstart
 1. build from source:
 
 - Mac user
@@ -18,7 +18,7 @@ make LLAMA_CUDA=1
 npm install jsonrepair
 ```
 
-3. Download a compatible Rubra's gguf model:
+3. Download a compatible Rubra GGUF model:
 For example:
 ```
 wget https://huggingface.co/rubra-ai/Llama-3-8b-function-calling-alpha-v1.gguf/resolve/main/Llama-3-8b-function-calling-alpha-v1.gguf
@@ -29,7 +29,7 @@ wget https://huggingface.co/rubra-ai/Llama-3-8b-function-calling-alpha-v1.gguf/r
 ./llama-server -ngl 37 -m Llama-3-8b-function-calling-alpha-v1.gguf   --port 1234 --host 0.0.0.0  -c 8000 --chat-template llama3
 ```
 
-5. Test the server, make sure it is available:
+5. Test the server, ensure it is available:
 ```bash
 curl localhost:1234/v1/chat/completions \
   -H "Content-Type: application/json" \
@@ -95,6 +95,22 @@ That's it! MAKE SURE you turn `stream` OFF when making api calls to the server,
 
 For more function calling examples, you can checkout `test_llamacpp.ipynb` notebook.
 
+### Choosing a Chat Template for Different Models
+
+| Model   | Chat Template |
+|---------|:-------------:|
+| Llama3  |     llama3    |
+| Mistral |     llama2    |
+| Phi3    |      phi3     |
+| Gemma   |     gemma     |
+| Qwen2   |     chatml    |
+
+For example, to run [Rubra's enhanced Phi3 model](https://huggingface.co/rubra-ai/Phi-3-mini-128k-instruct-function-calling-alpha-v1-GGUF), use the following command:
+
+```bash
+./llama-server -ngl 37 -m phi-3-mini-128k-instruct-function-calling-alpha-v1.Q8_0.gguf --port 1234 --host 0.0.0.0 -c 32000 --chat-template phi3
+```
+
 ### Recent API changes
 
 - [2024 Apr 21] `llama_token_to_piece` can now optionally render special tokens https://github.com/ggerganov/llama.cpp/pull/6807

diff --git a/examples/server/function-call-parser.hpp b/examples/server/function-call-parser.hpp
@@ -5,7 +5,7 @@
 #include <memory>
 
 using json = nlohmann::ordered_json;
-
+extern bool server_verbose;
 
 std::string generate_uuid() {
     static std::random_device rd;
@@ -87,7 +87,7 @@ json clean_json_strings(const std::string& input_str) {
         }
         return data;
     } catch (const json::parse_error& e) {
-        std::cout << "Error decoding JSON: " << e.what() << std::endl;
+        std::cerr << "Error decoding JSON: " << e.what() << std::endl;
         return nullptr;
     }
 }
@@ -97,7 +97,9 @@ json clean_json_strings(const std::string& input_str) {
 
 std::vector<json> rubra_fc_json_tool_extractor(const std::string& output_str) {
     std::vector<json> result;
-    printf("OUTPUT STR TO BE PARSED : %s\n", output_str.c_str());
+    if (server_verbose) {
+        std::cout << "Output to Parse : " << output_str.c_str() << std::endl;
+    }
     if (output_str.find("endtoolcall") == std::string::npos) {
         return result;
     }

diff --git a/examples/server/function-call.hpp b/examples/server/function-call.hpp
@@ -237,7 +237,6 @@ const std::vector<json> expand_messages(const json & body, json &tool_name_map)
     }
 
     if (function_str != "") {
-        printf("\n=============Formatting function call Input from OPENAI format...============\n");
         const std::vector<json> expanded_messages = [&]() {
             std::vector<json> temp_vec;
             nlohmann::ordered_map<std::string, std::string> func_observation_map;
@@ -289,7 +288,7 @@ const std::vector<json> expand_messages(const json & body, json &tool_name_map)
                     if (func_observation_map.find(tool_call_id) != func_observation_map.end()) {
                         func_observation_map[tool_call_id] = body["messages"][i]["content"].get<std::string>();
                     } else {
-                        printf("Tool call id not found in the map : %s", tool_call_id.c_str());
+                        std::cerr << "Tool call id not found in the map :" << tool_call_id.c_str() << std::endl;
                         // TODO: the input is not valid in this case, should return an error
                     }
 

diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
@@ -500,7 +500,7 @@ static json format_final_response_oaicompat(const json & request, json result, c
         }},
         {"id", completion_id}
     };
-    printf("==============formatted_final_response_oaicompat================\n %s\n\n", res.dump().c_str());
+    LOG_VERBOSE("final_oai_response", {{"response:", res.dump().c_str()}});
 
     if (server_verbose) {
         res["__verbose"] = result;