From c2d8f61cad77f7afa5ab42806a72427da25594ed Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Fri, 17 Jan 2025 17:48:12 +0700
Subject: [PATCH] fix: remote engine: mistral error on stream mode (#1871)

Co-authored-by: vansangpfiev <sang@jan.ai>
---
 engine/controllers/models.cc                  |   1 +
 .../extensions/remote-engine/remote_engine.cc |   5 +-
 engine/services/model_service.cc              | 118 +++++++++---------
 3 files changed, 63 insertions(+), 61 deletions(-)

diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index 5bf02aa46..64c237dad 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -526,6 +526,7 @@ void Models::StartModel(
   if (auto& o = (*(req->getJsonObject()))["llama_model_path"]; !o.isNull()) {
     auto model_path = o.asString();
     if (auto& mp = (*(req->getJsonObject()))["model_path"]; mp.isNull()) {
+      mp = model_path;
       // Bypass if model does not exist in DB and llama_model_path exists
       if (std::filesystem::exists(model_path) &&
           !model_service_->HasModel(model_handle)) {
diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc
index 989961092..b843a3d58 100644
--- a/engine/extensions/remote-engine/remote_engine.cc
+++ b/engine/extensions/remote-engine/remote_engine.cc
@@ -27,8 +27,9 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb,
   auto* context = static_cast<StreamContext*>(userdata);
   std::string chunk(ptr, size * nmemb);
   CTL_DBG(chunk);
-  auto check_error = json_helper::ParseJsonString(chunk);
-  if (check_error.isMember("error")) {
+  Json::Value check_error;
+  Json::Reader reader;
+  if (reader.parse(chunk, check_error)) {
     CTL_WRN(chunk);
     Json::Value status;
     status["is_done"] = true;
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index f79c20859..4a21dff40 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -819,75 +819,75 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
     constexpr const int kDefautlContextLength = 8192;
     int max_model_context_length = kDefautlContextLength;
     Json::Value json_data;
-    auto model_entry = db_service_->GetModelInfo(model_handle);
-    if (model_entry.has_error()) {
-      CTL_WRN("Error: " + model_entry.error());
-      return cpp::fail(model_entry.error());
-    }
-    yaml_handler.ModelConfigFromFile(
-        fmu::ToAbsoluteCortexDataPath(
-            fs::path(model_entry.value().path_to_model_yaml))
-            .string());
-    auto mc = yaml_handler.GetModelConfig();
-
-    // Check if Python model first
-    if (mc.engine == kPythonEngine) {
-
-      config::PythonModelConfig python_model_config;
-      python_model_config.ReadFromYaml(
-
+    // Currently we don't support download vision models, so we need to bypass check
+    if (!bypass_model_check) {
+      auto model_entry = db_service_->GetModelInfo(model_handle);
+      if (model_entry.has_error()) {
+        CTL_WRN("Error: " + model_entry.error());
+        return cpp::fail(model_entry.error());
+      }
+      yaml_handler.ModelConfigFromFile(
           fmu::ToAbsoluteCortexDataPath(
               fs::path(model_entry.value().path_to_model_yaml))
               .string());
-      // Start all depends model
-      auto depends = python_model_config.depends;
-      for (auto& depend : depends) {
-        Json::Value temp;
-        auto res = StartModel(depend, temp, false);
-        if (res.has_error()) {
-          CTL_WRN("Error: " + res.error());
-          for (auto& depend : depends) {
-            if (depend != model_handle) {
-              StopModel(depend);
+      auto mc = yaml_handler.GetModelConfig();
+
+      // Check if Python model first
+      if (mc.engine == kPythonEngine) {
+
+        config::PythonModelConfig python_model_config;
+        python_model_config.ReadFromYaml(
+
+            fmu::ToAbsoluteCortexDataPath(
+                fs::path(model_entry.value().path_to_model_yaml))
+                .string());
+        // Start all depends model
+        auto depends = python_model_config.depends;
+        for (auto& depend : depends) {
+          Json::Value temp;
+          auto res = StartModel(depend, temp, false);
+          if (res.has_error()) {
+            CTL_WRN("Error: " + res.error());
+            for (auto& depend : depends) {
+              if (depend != model_handle) {
+                StopModel(depend);
+              }
             }
+            return cpp::fail("Model failed to start dependency '" + depend +
+                             "' : " + res.error());
           }
-          return cpp::fail("Model failed to start dependency '" + depend +
-                           "' : " + res.error());
         }
-      }
 
-      json_data["model"] = model_handle;
-      json_data["model_path"] =
-          fmu::ToAbsoluteCortexDataPath(
-              fs::path(model_entry.value().path_to_model_yaml))
-              .string();
-      json_data["engine"] = mc.engine;
-      assert(!!inference_svc_);
-      // Check if python engine
-
-      auto ir =
-          inference_svc_->LoadModel(std::make_shared<Json::Value>(json_data));
-      auto status = std::get<0>(ir)["status_code"].asInt();
-      auto data = std::get<1>(ir);
-
-      if (status == drogon::k200OK) {
-        return StartModelResult{.success = true, .warning = ""};
-      } else if (status == drogon::k409Conflict) {
-        CTL_INF("Model '" + model_handle + "' is already loaded");
-        return StartModelResult{.success = true, .warning = ""};
-      } else {
-        // only report to user the error
-        for (auto& depend : depends) {
+        json_data["model"] = model_handle;
+        json_data["model_path"] =
+            fmu::ToAbsoluteCortexDataPath(
+                fs::path(model_entry.value().path_to_model_yaml))
+                .string();
+        json_data["engine"] = mc.engine;
+        assert(!!inference_svc_);
+        // Check if python engine
+
+        auto ir =
+            inference_svc_->LoadModel(std::make_shared<Json::Value>(json_data));
+        auto status = std::get<0>(ir)["status_code"].asInt();
+        auto data = std::get<1>(ir);
 
-          StopModel(depend);
+        if (status == drogon::k200OK) {
+          return StartModelResult{.success = true, .warning = ""};
+        } else if (status == drogon::k409Conflict) {
+          CTL_INF("Model '" + model_handle + "' is already loaded");
+          return StartModelResult{.success = true, .warning = ""};
+        } else {
+          // only report to user the error
+          for (auto& depend : depends) {
+
+            StopModel(depend);
+          }
         }
+        CTL_ERR("Model failed to start with status code: " << status);
+        return cpp::fail("Model failed to start: " +
+                         data["message"].asString());
       }
-      CTL_ERR("Model failed to start with status code: " << status);
-      return cpp::fail("Model failed to start: " + data["message"].asString());
-    }
-
-    // Currently we don't support download vision models, so we need to bypass check
-    if (!bypass_model_check) {
 
       // Running remote model
       if (engine_svc_->IsRemoteEngine(mc.engine)) {