From f613a91b909d396fa74d87b1fd0973c76069a82e Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 17 Nov 2023 14:21:27 +0700 Subject: [PATCH 1/2] embedding that is compatible with openai --- controllers/llamaCPP.cc | 50 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index e3901df60..e01bd9a46 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -27,6 +27,45 @@ std::shared_ptr createState(int task_id, llamaCPP *instance) { // -------------------------------------------- +#include +#include +#include +#include + +std::string create_embedding_payload(const std::vector &embedding, + int prompt_tokens) { + Json::Value root; + + root["object"] = "list"; + + Json::Value dataArray(Json::arrayValue); + Json::Value dataItem; + + dataItem["object"] = "embedding"; + + Json::Value embeddingArray(Json::arrayValue); + for (const auto &value : embedding) { + embeddingArray.append(value); + } + dataItem["embedding"] = embeddingArray; + dataItem["index"] = 0; + + dataArray.append(dataItem); + root["data"] = dataArray; + + root["model"] = "_"; + + Json::Value usage; + usage["prompt_tokens"] = prompt_tokens; + usage["total_tokens"] = prompt_tokens; // Assuming total tokens equals prompt + // tokens in this context + root["usage"] = usage; + + Json::StreamWriterBuilder writer; + writer["indentation"] = ""; // Compact output + return Json::writeString(writer, root); +} + std::string create_full_return_json(const std::string &id, const std::string &model, const std::string &content, @@ -245,17 +284,18 @@ void llamaCPP::embedding( const auto &jsonBody = req->getJsonObject(); json prompt; - if (jsonBody->isMember("content") != 0) { - prompt = (*jsonBody)["content"].asString(); + if (jsonBody->isMember("input") != 0) { + prompt = (*jsonBody)["input"].asString(); } else { prompt = ""; } const int task_id = llama.request_completion( {{"prompt", prompt}, {"n_predict", 0}}, false, true); task_result result = llama.next_result(task_id); - std::string embeddingResp = result.result_json.dump(); + std::vector embedding_result = result.result_json["embedding"]; auto resp = nitro_utils::nitroHttpResponse(); - resp->setBody(embeddingResp); + std::string embedding_resp = create_embedding_payload(embedding_result, 0); + resp->setBody(embedding_resp); resp->setContentTypeString("application/json"); callback(resp); return; @@ -363,7 +403,7 @@ void llamaCPP::loadModel( llama.initialize(); Json::Value jsonResp; - jsonResp["message"] = "Failed to load model"; + jsonResp["message"] = "Model loaded successfully"; model_loaded = true; auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); From 6aa879bb412190b865fef3284e29f1fe4f0c6386 Mon Sep 17 00:00:00 2001 From: tikikun Date: Fri, 17 Nov 2023 14:22:52 +0700 Subject: [PATCH 2/2] embedding that is compatible with openai --- controllers/llamaCPP.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index e01bd9a46..63c032657 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -27,11 +27,6 @@ std::shared_ptr createState(int task_id, llamaCPP *instance) { // -------------------------------------------- -#include -#include -#include -#include - std::string create_embedding_payload(const std::vector &embedding, int prompt_tokens) { Json::Value root;