Skip to content

Commit

Permalink
make yaml formatter and extractor working. TODO: yaml-parser needs to…
Browse files Browse the repository at this point in the history
… take care of data type of args correctly instead of treat everything as string
  • Loading branch information
tybalex committed May 1, 2024
1 parent 5c62aae commit aa5b207
Show file tree
Hide file tree
Showing 45 changed files with 4,674 additions and 491 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -802,8 +802,8 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

server: examples/server/server.cpp examples/server/utils.hpp examples/server/python-parser.hpp examples/server/function-call-str.hpp examples/server/tree_sitter/libtree-sitter.a examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o func_scanner.o func_parser.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -I examples/server/tree_sitter -o $(call GET_OBJ_FILE, $<)
server: examples/server/server.cpp examples/server/utils.hpp examples/server/python-parser.hpp examples/server/yaml-parser.hpp examples/server/function-call.hpp examples/server/tree_sitter/libtree-sitter.a examples/server/yaml-cpp/libyaml-cpp.a examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o func_scanner.o func_parser.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -I examples/server/tree_sitter -I examples/server/yaml-cpp -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) $(call GET_OBJ_FILE, $<) -Iexamples/server -o $@ $(LDFLAGS) $(LWINSOCK2)

# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
Expand Down
348 changes: 0 additions & 348 deletions examples/server/function-call-str.hpp

This file was deleted.

695 changes: 695 additions & 0 deletions examples/server/function-call.hpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion examples/server/python-parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ static void parseFunctionCalls(const TSNode& node, std::vector<json>& calls, con
}
}

static std::vector<json> parsePythonFunctionCalls(std::string source_string, json tool_name_map) {
std::vector<json> parsePythonFunctionCalls(std::string source_string, json tool_name_map) {
// Parse Python function calls from the source code and return a JSON array
std::vector<json> calls;
std::string delimiter = "<<functions>>";
Expand Down
139 changes: 9 additions & 130 deletions examples/server/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

#include "json.hpp"
#include "python-parser.hpp"
#include "function-call-str.hpp"
#include "function-call.hpp"
#include "yaml-parser.hpp"

#include <string>
#include <vector>
Expand Down Expand Up @@ -357,135 +358,11 @@ static json oaicompat_completion_params_parse(

llama_params["__oaicompat"] = true;

std::string function_str = "";

json tool_name_map;

if (body.contains("tools") && !body["tools"].empty()) {
// function_str = default_tool_formatter(body["tool"]);
function_str = rubra_format_typescript_function_call_str(body["tools"], tool_name_map);
}
// If 'tool' is not set or empty, check 'functions'
else if (body.contains("functions") && !body["functions"].empty()) {
// function_str = default_tool_formatter(body["functions"]);
function_str = rubra_format_typescript_function_call_str(body["functions"], tool_name_map);
}
printf("\n=============Formatting Input from OPENAI format...============\n");
if (function_str != "") {
const std::vector<json> expand_messages = [&]() {
// std::vector<json> temp_vec = body["messages"];
// if (body["messages"][0]["role"] == "system") {
// std::string old_content = temp_vec[0]["content"];
// temp_vec[0]["content"] = old_content + "\n" + function_str;
// }
// else {
// json function_call;
// function_call["role"] = "system";
// function_call["content"] = "You are a helpful assistant.\n" + function_str;
// temp_vec.push_back(function_call);
// }
std::vector<json> temp_vec;
nlohmann::ordered_map<std::string, std::string> func_observation_map;
for (size_t i = 0; i < body["messages"].size(); ++i) {

if (body["messages"][i]["role"] != "tool" and func_observation_map.size() > 0) {
// insert the observation from the tool call before the next message
std::string observation_str = "";
for (const auto& [key, value] : func_observation_map) {
if (observation_str != "") {
observation_str += ", ";
}
observation_str += value;
}
observation_str = std::string("<<observation>>") + "[" + observation_str + "]";
json observation_call;
observation_call["role"] = "observation";
observation_call["content"] = observation_str;
temp_vec.push_back(observation_call);
func_observation_map.clear();
}

if (i == 0){
if (body["messages"][0]["role"] == "system") {
std::string old_content = body["messages"][0]["content"];
json function_call;
function_call["role"] = "system";
function_call["content"] = old_content + "\n" + function_str;
temp_vec.push_back(function_call);
}
else { // insert a system message of tool definition before the first message
json function_call;
function_call["role"] = "system";
function_call["content"] = "You are a helpful assistant.\n" + function_str;
temp_vec.push_back(function_call);
temp_vec.push_back(body["messages"][0]);
}
}
// else if (body["messages"][i]["role"] == "assistant" and (body["messages"][i]["content"].is_null() or body["messages"][i]["content"]=="") and !body["messages"][i]["tool_calls"].is_null() and !body["messages"][i]["tool_calls"].empty()){
else if (body["messages"][i]["role"] == "assistant" and body["messages"][i].contains("tool_calls")){
// convert OpenAI function call format to Rubra format
std::string tool_call_str = "";
for (const auto & tool_call : body["messages"][i]["tool_calls"]) {
std::string func_str = "";
func_observation_map[tool_call["id"].get<std::string>()] = ""; // initialize with empty value and later should be updated with the actual value from "tool_call" role message
json args = json::parse(tool_call["function"]["arguments"].get<std::string>()); // TODO: catch the exceptions
for (auto& arg : args.items()) {
if (func_str != "") {
func_str += ", ";
}
func_str += arg.key() + "=" + arg.value().dump();
}
func_str = tool_call["function"]["name"].get<std::string>() + "(" + func_str + ")";
if (tool_call_str != "") {
tool_call_str += ", ";
}
tool_call_str += func_str;
}
tool_call_str = std::string("<<functions>>") + "[" + tool_call_str + "]";

json function_call;
function_call["role"] = "function";
function_call["content"] = tool_call_str;
temp_vec.push_back(function_call);
}
else if (body["messages"][i]["role"] == "tool") {
std::string tool_call_id = body["messages"][i]["tool_call_id"].get<std::string>();
if (func_observation_map.find(tool_call_id) != func_observation_map.end()) {
func_observation_map[tool_call_id] = body["messages"][i]["content"].get<std::string>();
} else {
LOG_ERROR("Tool call id not found in the map", {{"tool_call_id", tool_call_id}});
// TODO: the input is not valid in this case, should return an error
}

}
else {
temp_vec.push_back(body["messages"][i]);
}

}
if (func_observation_map.size() > 0) {
// insert the observation from the tool call before the next message
std::string observation_str = "";
for (const auto& [key, value] : func_observation_map) {
if (observation_str != "") {
observation_str += ", ";
}
observation_str += value;
}
observation_str = std::string("<<observation>>") + "[" + observation_str + "]";
json observation_call;
observation_call["role"] = "observation";
observation_call["content"] = observation_str;
temp_vec.push_back(observation_call);
func_observation_map.clear();
}
return temp_vec;
}();
llama_params["prompt"] = format_chat(model, chat_template, expand_messages);
}
else {
llama_params["prompt"] = format_chat(model, chat_template, body["messages"]);
}
printf("==================END of InPut================\n\n");
const std::vector<json> expanded_messages = expand_messages(body, tool_name_map);
llama_params["prompt"] = format_chat(model, chat_template, expanded_messages);
llama_params["tool_name_map"] = tool_name_map;

// Map OpenAI parameters to llama.cpp parameters
Expand Down Expand Up @@ -569,7 +446,8 @@ static json format_final_response_oaicompat(const json & request, json result, c
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
std::string content = json_value(result, "content", std::string(""));

std::vector<json> parsed_content = parsePythonFunctionCalls(content, request["tool_name_map"]);
// std::vector<json> parsed_content = parsePythonFunctionCalls(content, request["tool_name_map"]);
std::vector<json> parsed_content = rubra_fc_yaml_tool_extractor(content);

std::string finish_reason = "length";
if (stopped_word || stopped_eos) {
Expand Down Expand Up @@ -651,7 +529,8 @@ static std::vector<json> format_partial_response_oaicompat(json request ,json re
bool stopped_limit = json_value(result, "stopped_limit", false);
std::string content = json_value(result, "content", std::string(""));

std::vector<json> parsed_content = parsePythonFunctionCalls(content, request["tool_name_map"]);
// std::vector<json> parsed_content = parsePythonFunctionCalls(content, request["tool_name_map"]);
std::vector<json> parsed_content = rubra_fc_yaml_tool_extractor(content);
std::time_t t = std::time(0);
if (!parsed_content.empty()) {
std::vector<json> res;
Expand Down
Binary file added examples/server/yaml-cpp/libyaml-cpp.a
Binary file not shown.
17 changes: 17 additions & 0 deletions examples/server/yaml-cpp/yaml-cpp/anchor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66

#if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once
#endif

#include <cstddef>

namespace YAML {
using anchor_t = std::size_t;
const anchor_t NullAnchor = 0;
}

#endif // ANCHOR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
71 changes: 71 additions & 0 deletions examples/server/yaml-cpp/yaml-cpp/binary.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#ifndef BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66

#if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once
#endif

#include <string>
#include <vector>

#include "yaml-cpp/dll.h"

namespace YAML {
YAML_CPP_API std::string EncodeBase64(const unsigned char *data,
std::size_t size);
YAML_CPP_API std::vector<unsigned char> DecodeBase64(const std::string &input);

class YAML_CPP_API Binary {
public:
Binary(const unsigned char *data_, std::size_t size_)
: m_data{}, m_unownedData(data_), m_unownedSize(size_) {}
Binary() : Binary(nullptr, 0) {}
Binary(const Binary &) = default;
Binary(Binary &&) = default;
Binary &operator=(const Binary &) = default;
Binary &operator=(Binary &&) = default;

bool owned() const { return !m_unownedData; }
std::size_t size() const { return owned() ? m_data.size() : m_unownedSize; }
const unsigned char *data() const {
return owned() ? &m_data[0] : m_unownedData;
}

void swap(std::vector<unsigned char> &rhs) {
if (m_unownedData) {
m_data.swap(rhs);
rhs.clear();
rhs.resize(m_unownedSize);
std::copy(m_unownedData, m_unownedData + m_unownedSize, rhs.begin());
m_unownedData = nullptr;
m_unownedSize = 0;
} else {
m_data.swap(rhs);
}
}

bool operator==(const Binary &rhs) const {
const std::size_t s = size();
if (s != rhs.size())
return false;
const unsigned char *d1 = data();
const unsigned char *d2 = rhs.data();
for (std::size_t i = 0; i < s; i++) {
if (*d1++ != *d2++)
return false;
}
return true;
}

bool operator!=(const Binary &rhs) const { return !(*this == rhs); }

private:
std::vector<unsigned char> m_data;
const unsigned char *m_unownedData;
std::size_t m_unownedSize;
};
} // namespace YAML

#endif // BASE64_H_62B23520_7C8E_11DE_8A39_0800200C9A66
40 changes: 40 additions & 0 deletions examples/server/yaml-cpp/yaml-cpp/contrib/anchordict.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66

#if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once
#endif

#include <vector>

#include "../anchor.h"

namespace YAML {
/**
* An object that stores and retrieves values correlating to {@link anchor_t}
* values.
*
* <p>Efficient implementation that can make assumptions about how
* {@code anchor_t} values are assigned by the {@link Parser} class.
*/
template <class T>
class AnchorDict {
public:
AnchorDict() : m_data{} {}
void Register(anchor_t anchor, T value) {
if (anchor > m_data.size()) {
m_data.resize(anchor);
}
m_data[anchor - 1] = value;
}

T Get(anchor_t anchor) const { return m_data[anchor - 1]; }

private:
std::vector<T> m_data;
};
} // namespace YAML

#endif // ANCHORDICT_H_62B23520_7C8E_11DE_8A39_0800200C9A66
Loading

0 comments on commit aa5b207

Please sign in to comment.