diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md index acc19b4993..78841f087e 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md @@ -411,9 +411,14 @@ Below are the API references and multi-language service invocation examples: Positions and contents of formulas. -image +layoutImage string -Formula recognition result image with detected formula positions annotated. The image is in JPEG format and encoded in Base64. +Layout area detection result image. The image is in JPEG format and encoded using Base64. + + +ocrImage +string +OCR result image. The image is in JPEG format and encoded using Base64. @@ -464,7 +469,8 @@ Below are the API references and multi-language service invocation examples: "latex": "F({\bf x})=C(F_{1}(x_{1}),\cdot\cdot\cdot,F_{N}(x_{N})).\qquad\qquad\qquad(1)" } ], -"image": "xxxxxx" +"layoutImage": "xxxxxx", +"ocrImage": "xxxxxx" } @@ -479,7 +485,7 @@ import requests API_URL = "http://localhost:8080/formula-recognition" image_path = "./demo.jpg" -output_image_path = "./out.jpg" +layout_image_path = "./layout.jpg" with open(image_path, "rb") as file: image_bytes = file.read() @@ -491,9 +497,9 @@ response = requests.post(API_URL, json=payload) assert response.status_code == 200 result = response.json()["result"] -with open(output_image_path, "wb") as file: - file.write(base64.b64decode(result["image"])) -print(f"Output image saved at {output_image_path}") +with open(layout_image_path, "wb") as file: + file.write(base64.b64decode(result["layoutImage"])) +print(f"Output image saved at {layout_image_path}") print("\nDetected formulas:") print(result["formulas"]) @@ -508,7 +514,7 @@ print(result["formulas"]) int main() { httplib::Client client("localhost:8080"); const std::string imagePath = "./demo.jpg"; - const std::string outputImagePath = "./out.jpg"; + const std::string layoutImagePath = "./layout.jpg"; httplib::Headers headers = { {"Content-Type", "application/json"} @@ -535,16 +541,16 @@ int main() { nlohmann::json jsonResponse = nlohmann::json::parse(response->body); auto result = jsonResponse["result"]; - encodedImage = result["image"]; - std::string decodedString = base64::from_base64(encodedImage); - std::vector<unsigned char> decodedImage(decodedString.begin(), decodedString.end()); - std::ofstream outputImage(outPutImagePath, std::ios::binary | std::ios::out); - if (outputImage.is_open()) { - outputImage.write(reinterpret_cast<char*>(decodedImage.data()), decodedImage.size()); - outputImage.close(); - std::cout << "Output image saved at " << outPutImagePath << std::endl; + encodedImage = result["layoutImage"]; + decodedString = base64::from_base64(encodedImage); + std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end()); + std::ofstream outputLayoutFile(layoutImagePath, std::ios::binary | std::ios::out); + if (outputLayoutFile.is_open()) { + outputLayoutFile.write(reinterpret_cast<char*>(decodedLayoutImage.data()), decodedLayoutImage.size()); + outputLayoutFile.close(); + std::cout << "Output image saved at " << layoutImagePath << std::endl; } else { - std::cerr << "Unable to open file for writing: " << outPutImagePath << std::endl; + std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl; } auto formulas = result["formulas"]; @@ -577,7 +583,7 @@ public class Main { public static void main(String[] args) throws IOException { String API_URL = "http://localhost:8080/formula-recognition"; String imagePath = "./demo.jpg"; - String outputImagePath = "./out.jpg"; + String layoutImagePath = "./layout.jpg"; File file = new File(imagePath); byte[] fileContent = java.nio.file.Files.readAllBytes(file.toPath()); @@ -600,14 +606,15 @@ public class Main { String responseBody = response.body().string(); JsonNode resultNode = objectMapper.readTree(responseBody); JsonNode result = resultNode.get("result"); - String base64Image = result.get("image").asText(); + String layoutBase64Image = result.get("layoutImage").asText(); JsonNode formulas = result.get("formulas"); - byte[] imageBytes = Base64.getDecoder().decode(base64Image); - try (FileOutputStream fos = new FileOutputStream(outputImagePath)) { + imageBytes = Base64.getDecoder().decode(layoutBase64Image); + try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) { fos.write(imageBytes); } - System.out.println("Output image saved at " + outputImagePath); + System.out.println("Output image saved at " + layoutImagePath); + System.out.println("\nDetected formulas: " + formulas.toString()); } else { System.err.println("Request failed with code: " + response.code()); @@ -633,7 +640,7 @@ import ( func main() { API_URL := "http://localhost:8080/formula-recognition" imagePath := "./demo.jpg" - outputImagePath := "./out.jpg" + layoutImagePath := "./layout.jpg" imageBytes, err := ioutil.ReadFile(imagePath) if err != nil { @@ -670,7 +677,7 @@ func main() { } type Response struct { Result struct { - Image string `json:"image"` + LayoutImage string `json:"layoutImage"` Formulas []map[string]interface{} `json:"formulas"` } `json:"result"` } @@ -681,17 +688,18 @@ func main() { return } - outputImageData, err := base64.StdEncoding.DecodeString(respData.Result.Image) + layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage) if err != nil { fmt.Println("Error decoding base64 image data:", err) return } - err = ioutil.WriteFile(outputImagePath, outputImageData, 0644) + err = ioutil.WriteFile(layoutImagePath, layoutImageData, 0644) if err != nil { fmt.Println("Error writing image to file:", err) return } - fmt.Printf("Image saved at %s.jpg\n", outputImagePath) + fmt.Printf("Image saved at %s.jpg\n", layoutImagePath) + fmt.Println("\nDetected formulas:") for _, formula := range respData.Result.Formulas { fmt.Println(formula) @@ -713,7 +721,7 @@ class Program { static readonly string API_URL = "http://localhost:8080/formula-recognition"; static readonly string imagePath = "./demo.jpg"; - static readonly string outputImagePath = "./out.jpg"; + static readonly string layoutImagePath = "./layout.jpg"; static async Task Main(string[] args) { @@ -731,11 +739,11 @@ class Program string responseBody = await response.Content.ReadAsStringAsync(); JObject jsonResponse = JObject.Parse(responseBody); - string base64Image = jsonResponse["result"]["image"].ToString(); - byte[] outputImageBytes = Convert.FromBase64String(base64Image); + string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString(); + byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image); + File.WriteAllBytes(layoutImagePath, layoutImageBytes); + Console.WriteLine($"Output image saved at {layoutImagePath}"); - File.WriteAllBytes(outputImagePath, outputImageBytes); - Console.WriteLine($"Output image saved at {outputImagePath}"); Console.WriteLine("\nDetected formulas:"); Console.WriteLine(jsonResponse["result"]["formulas"].ToString()); } @@ -749,7 +757,7 @@ const fs = require('fs'); const API_URL = 'http://localhost:8080/formula-recognition' const imagePath = './demo.jpg' -const outputImagePath = "./out.jpg"; +const layoutImagePath = "./layout.jpg"; let config = { method: 'POST', @@ -768,11 +776,13 @@ function encodeImageToBase64(filePath) { axios.request(config) .then((response) => { const result = response.data["result"]; - const imageBuffer = Buffer.from(result["image"], 'base64'); - fs.writeFile(outputImagePath, imageBuffer, (err) => { + + imageBuffer = Buffer.from(result["layoutImage"], 'base64'); + fs.writeFile(layoutImagePath, imageBuffer, (err) => { if (err) throw err; - console.log(`Output image saved at ${outputImagePath}`); + console.log(`Output image saved at ${layoutImagePath}`); }); + console.log("\nDetected formulas:"); console.log(result["formulas"]); }) @@ -785,9 +795,9 @@ axios.request(config)
<?php
 
-$API_URL = "http://localhost:8080/formula-recognition";
+$API_URL = "http://localhost:8080/formula-recognition"
 $image_path = "./demo.jpg";
-$output_image_path = "./out.jpg";
+$layout_image_path = "./layout.jpg"
 
 $image_data = base64_encode(file_get_contents($image_path));
 $payload = array("image" => $image_data);
@@ -801,8 +811,10 @@ $response = curl_exec($ch);
 curl_close($ch);
 
 $result = json_decode($response, true)["result"];
-file_put_contents($output_image_path, base64_decode($result["image"]));
-echo "Output image saved at " . $output_image_path . "\n";
+
+file_put_contents($layout_image_path, base64_decode($result["layoutImage"]));
+echo "Output image saved at " . $layout_image_path . "\n";
+
 echo "\nDetected formulas:\n";
 print_r($result["formulas"]);
 
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
index 5bf2c5709c..8d8a39bb67 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
@@ -412,9 +412,14 @@ for res in output:
 公式位置和内容。
 
 
-image
+layoutImage
 string
-公式识别结果图,其中标注检测到的公式位置。图像为JPEG格式,使用Base64编码。
+版面区域检测结果图。图像为JPEG格式,使用Base64编码。
+
+
+ocrImage
+string
+OCR结果图。图像为JPEG格式,使用Base64编码。
 
 
 
@@ -465,7 +470,8 @@ for res in output:
 "latex": "F({\bf x})=C(F_{1}(x_{1}),\cdot\cdot\cdot,F_{N}(x_{N})).\qquad\qquad\qquad(1)"
 }
 ],
-"image": "xxxxxx"
+"layoutImage": "xxxxxx",
+"ocrImage": "xxxxxx"
 }
 
@@ -480,7 +486,7 @@ import requests API_URL = "http://localhost:8080/formula-recognition" # 服务URL image_path = "./demo.jpg" -output_image_path = "./out.jpg" +layout_image_path = "./layout.jpg" # 对本地图像进行Base64编码 with open(image_path, "rb") as file: @@ -495,9 +501,9 @@ response = requests.post(API_URL, json=payload) # 处理接口返回数据 assert response.status_code == 200 result = response.json()["result"] -with open(output_image_path, "wb") as file: - file.write(base64.b64decode(result["image"])) -print(f"Output image saved at {output_image_path}") +with open(layout_image_path, "wb") as file: + file.write(base64.b64decode(result["layoutImage"])) +print(f"Output image saved at {layout_image_path}") print("\nDetected formulas:") print(result["formulas"]) @@ -512,7 +518,7 @@ print(result["formulas"]) int main() { httplib::Client client("localhost:8080"); const std::string imagePath = "./demo.jpg"; - const std::string outputImagePath = "./out.jpg"; + const std::string layoutImagePath = "./layout.jpg"; httplib::Headers headers = { {"Content-Type", "application/json"} @@ -542,16 +548,16 @@ int main() { nlohmann::json jsonResponse = nlohmann::json::parse(response->body); auto result = jsonResponse["result"]; - encodedImage = result["image"]; - std::string decodedString = base64::from_base64(encodedImage); - std::vector<unsigned char> decodedImage(decodedString.begin(), decodedString.end()); - std::ofstream outputImage(outPutImagePath, std::ios::binary | std::ios::out); - if (outputImage.is_open()) { - outputImage.write(reinterpret_cast<char*>(decodedImage.data()), decodedImage.size()); - outputImage.close(); - std::cout << "Output image saved at " << outPutImagePath << std::endl; + encodedImage = result["layoutImage"]; + decodedString = base64::from_base64(encodedImage); + std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end()); + std::ofstream outputLayoutFile(layoutImagePath, std::ios::binary | std::ios::out); + if (outputLayoutFile.is_open()) { + outputLayoutFile.write(reinterpret_cast<char*>(decodedLayoutImage.data()), decodedLayoutImage.size()); + outputLayoutFile.close(); + std::cout << "Output image saved at " << layoutImagePath << std::endl; } else { - std::cerr << "Unable to open file for writing: " << outPutImagePath << std::endl; + std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl; } auto formulas = result["formulas"]; @@ -584,7 +590,7 @@ public class Main { public static void main(String[] args) throws IOException { String API_URL = "http://localhost:8080/formula-recognition"; // 服务URL String imagePath = "./demo.jpg"; // 本地图像 - String outputImagePath = "./out.jpg"; // 输出图像 + String layoutImagePath = "./layout.jpg"; // 对本地图像进行Base64编码 File file = new File(imagePath); @@ -610,14 +616,15 @@ public class Main { String responseBody = response.body().string(); JsonNode resultNode = objectMapper.readTree(responseBody); JsonNode result = resultNode.get("result"); - String base64Image = result.get("image").asText(); + String layoutBase64Image = result.get("layoutImage").asText(); JsonNode formulas = result.get("formulas"); - byte[] imageBytes = Base64.getDecoder().decode(base64Image); - try (FileOutputStream fos = new FileOutputStream(outputImagePath)) { + imageBytes = Base64.getDecoder().decode(layoutBase64Image); + try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) { fos.write(imageBytes); } - System.out.println("Output image saved at " + outputImagePath); + System.out.println("Output image saved at " + layoutImagePath); + System.out.println("\nDetected formulas: " + formulas.toString()); } else { System.err.println("Request failed with code: " + response.code()); @@ -643,7 +650,7 @@ import ( func main() { API_URL := "http://localhost:8080/formula-recognition" imagePath := "./demo.jpg" - outputImagePath := "./out.jpg" + layoutImagePath := "./layout.jpg" // 对本地图像进行Base64编码 imageBytes, err := ioutil.ReadFile(imagePath) @@ -683,7 +690,7 @@ func main() { } type Response struct { Result struct { - Image string `json:"image"` + LayoutImage string `json:"layoutImage"` Formulas []map[string]interface{} `json:"formulas"` } `json:"result"` } @@ -694,17 +701,18 @@ func main() { return } - outputImageData, err := base64.StdEncoding.DecodeString(respData.Result.Image) + layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage) if err != nil { fmt.Println("Error decoding base64 image data:", err) return } - err = ioutil.WriteFile(outputImagePath, outputImageData, 0644) + err = ioutil.WriteFile(layoutImagePath, layoutImageData, 0644) if err != nil { fmt.Println("Error writing image to file:", err) return } - fmt.Printf("Image saved at %s.jpg\n", outputImagePath) + fmt.Printf("Image saved at %s.jpg\n", layoutImagePath) + fmt.Println("\nDetected formulas:") for _, formula := range respData.Result.Formulas { fmt.Println(formula) @@ -726,7 +734,7 @@ class Program { static readonly string API_URL = "http://localhost:8080/formula-recognition"; static readonly string imagePath = "./demo.jpg"; - static readonly string outputImagePath = "./out.jpg"; + static readonly string layoutImagePath = "./layout.jpg"; static async Task Main(string[] args) { @@ -747,11 +755,11 @@ class Program string responseBody = await response.Content.ReadAsStringAsync(); JObject jsonResponse = JObject.Parse(responseBody); - string base64Image = jsonResponse["result"]["image"].ToString(); - byte[] outputImageBytes = Convert.FromBase64String(base64Image); + string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString(); + byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image); + File.WriteAllBytes(layoutImagePath, layoutImageBytes); + Console.WriteLine($"Output image saved at {layoutImagePath}"); - File.WriteAllBytes(outputImagePath, outputImageBytes); - Console.WriteLine($"Output image saved at {outputImagePath}"); Console.WriteLine("\nDetected formulas:"); Console.WriteLine(jsonResponse["result"]["formulas"].ToString()); } @@ -765,7 +773,7 @@ const fs = require('fs'); const API_URL = 'http://localhost:8080/formula-recognition' const imagePath = './demo.jpg' -const outputImagePath = "./out.jpg"; +const layoutImagePath = "./layout.jpg"; let config = { method: 'POST', @@ -787,11 +795,13 @@ axios.request(config) .then((response) => { // 处理接口返回数据 const result = response.data["result"]; - const imageBuffer = Buffer.from(result["image"], 'base64'); - fs.writeFile(outputImagePath, imageBuffer, (err) => { + + imageBuffer = Buffer.from(result["layoutImage"], 'base64'); + fs.writeFile(layoutImagePath, imageBuffer, (err) => { if (err) throw err; - console.log(`Output image saved at ${outputImagePath}`); + console.log(`Output image saved at ${layoutImagePath}`); }); + console.log("\nDetected formulas:"); console.log(result["formulas"]); }) @@ -806,7 +816,7 @@ axios.request(config) $API_URL = "http://localhost:8080/formula-recognition"; // 服务URL $image_path = "./demo.jpg"; -$output_image_path = "./out.jpg"; +$layout_image_path = "./layout.jpg"; // 对本地图像进行Base64编码 $image_data = base64_encode(file_get_contents($image_path)); @@ -823,8 +833,10 @@ curl_close($ch); // 处理接口返回数据 $result = json_decode($response, true)["result"]; -file_put_contents($output_image_path, base64_decode($result["image"])); -echo "Output image saved at " . $output_image_path . "\n"; + +file_put_contents($layout_image_path, base64_decode($result["layoutImage"])); +echo "Output image saved at " . $layout_image_path . "\n"; + echo "\nDetected formulas:\n"; print_r($result["formulas"]); diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md index 536379f5b5..b708defb72 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md @@ -555,18 +555,23 @@ Below are the API references and multi-language service invocation examples: -sealImpressions +texts array -Seal recognition results. +Positions, contents, and scores of texts. layoutImage string Layout area detection result image. The image is in JPEG format and encoded using Base64. + +ocrImage +string +OCR result image. The image is in JPEG format and encoded using Base64. + -

Each element in sealImpressions is an object with the following properties:

+

Each element in texts is an object with the following properties:

@@ -605,6 +610,7 @@ import requests API_URL = "http://localhost:8080/seal-recognition" image_path = "./demo.jpg" +ocr_image_path = "./ocr.jpg" layout_image_path = "./layout.jpg" with open(image_path, "rb") as file: @@ -617,11 +623,14 @@ response = requests.post(API_URL, json=payload) assert response.status_code == 200 result = response.json()["result"] +with open(ocr_image_path, "wb") as file: + file.write(base64.b64decode(result["ocrImage"])) +print(f"Output image saved at {ocr_image_path}") with open(layout_image_path, "wb") as file: file.write(base64.b64decode(result["layoutImage"])) print(f"Output image saved at {layout_image_path}") -print("\nDetected seal impressions:") -print(result["sealImpressions"]) +print("\nDetected texts:") +print(result["texts"])
C++ @@ -634,6 +643,7 @@ print(result["sealImpressions"]) int main() { httplib::Client client("localhost:8080"); const std::string imagePath = "./demo.jpg"; + const std::string ocrImagePath = "./ocr.jpg"; const std::string layoutImagePath = "./layout.jpg"; httplib::Headers headers = { @@ -661,6 +671,18 @@ int main() { nlohmann::json jsonResponse = nlohmann::json::parse(response->body); auto result = jsonResponse["result"]; + encodedImage = result["ocrImage"]; + std::string decoded_string = base64::from_base64(encodedImage); + std::vector<unsigned char> decodedOcrImage(decoded_string.begin(), decoded_string.end()); + std::ofstream outputOcrFile(ocrImagePath, std::ios::binary | std::ios::out); + if (outputOcrFile.is_open()) { + outputOcrFile.write(reinterpret_cast<char*>(decodedOcrImage.data()), decodedOcrImage.size()); + outputOcrFile.close(); + std::cout << "Output image saved at " << ocrImagePath << std::endl; + } else { + std::cerr << "Unable to open file for writing: " << ocrImagePath << std::endl; + } + encodedImage = result["layoutImage"]; decodedString = base64::from_base64(encodedImage); std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end()); @@ -673,10 +695,10 @@ int main() { std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl; } - auto impressions = result["sealImpressions"]; - std::cout << "\nDetected seal impressions:" << std::endl; - for (const auto& impression : impressions) { - std::cout << impression << std::endl; + auto texts = result["texts"]; + std::cout << "\nDetected texts:" << std::endl; + for (const auto& text : texts) { + std::cout << text << std::endl; } } else { std::cout << "Failed to send HTTP request." << std::endl; @@ -703,6 +725,7 @@ public class Main { public static void main(String[] args) throws IOException { String API_URL = "http://localhost:8080/seal-recognition"; String imagePath = "./demo.jpg"; + String ocrImagePath = "./ocr.jpg"; String layoutImagePath = "./layout.jpg"; File file = new File(imagePath); @@ -726,8 +749,15 @@ public class Main { String responseBody = response.body().string(); JsonNode resultNode = objectMapper.readTree(responseBody); JsonNode result = resultNode.get("result"); + String ocrBase64Image = result.get("ocrImage").asText(); String layoutBase64Image = result.get("layoutImage").asText(); - JsonNode impressions = result.get("sealImpressions"); + JsonNode texts = result.get("texts"); + + byte[] imageBytes = Base64.getDecoder().decode(ocrBase64Image); + try (FileOutputStream fos = new FileOutputStream(ocrImagePath)) { + fos.write(imageBytes); + } + System.out.println("Output image saved at " + ocrBase64Image); imageBytes = Base64.getDecoder().decode(layoutBase64Image); try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) { @@ -735,7 +765,7 @@ public class Main { } System.out.println("Output image saved at " + layoutImagePath); - System.out.println("\nDetected seal impressions: " + impressions.toString()); + System.out.println("\nDetected texts: " + texts.toString()); } else { System.err.println("Request failed with code: " + response.code()); } @@ -760,6 +790,7 @@ import ( func main() { API_URL := "http://localhost:8080/seal-recognition" imagePath := "./demo.jpg" + ocrImagePath := "./ocr.jpg" layoutImagePath := "./layout.jpg" imageBytes, err := ioutil.ReadFile(imagePath) @@ -797,8 +828,9 @@ func main() { } type Response struct { Result struct { + OcrImage string `json:"ocrImage"` LayoutImage string `json:"layoutImage"` - Impressions []map[string]interface{} `json:"sealImpressions"` + Texts []map[string]interface{} `json:"texts"` } `json:"result"` } var respData Response @@ -808,6 +840,18 @@ func main() { return } + ocrImageData, err := base64.StdEncoding.DecodeString(respData.Result.OcrImage) + if err != nil { + fmt.Println("Error decoding base64 image data:", err) + return + } + err = ioutil.WriteFile(ocrImagePath, ocrImageData, 0644) + if err != nil { + fmt.Println("Error writing image to file:", err) + return + } + fmt.Printf("Image saved at %s.jpg\n", ocrImagePath) + layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage) if err != nil { fmt.Println("Error decoding base64 image data:", err) @@ -820,9 +864,9 @@ func main() { } fmt.Printf("Image saved at %s.jpg\n", layoutImagePath) - fmt.Println("\nDetected seal impressions:") - for _, impression := range respData.Result.Impressions { - fmt.Println(impression) + fmt.Println("\nDetected texts:") + for _, text := range respData.Result.Texts { + fmt.Println(text) } }
@@ -841,6 +885,7 @@ class Program { static readonly string API_URL = "http://localhost:8080/seal-recognition"; static readonly string imagePath = "./demo.jpg"; + static readonly string ocrImagePath = "./ocr.jpg"; static readonly string layoutImagePath = "./layout.jpg"; static async Task Main(string[] args) @@ -859,13 +904,18 @@ class Program string responseBody = await response.Content.ReadAsStringAsync(); JObject jsonResponse = JObject.Parse(responseBody); + string ocrBase64Image = jsonResponse["result"]["ocrImage"].ToString(); + byte[] ocrImageBytes = Convert.FromBase64String(ocrBase64Image); + File.WriteAllBytes(ocrImagePath, ocrImageBytes); + Console.WriteLine($"Output image saved at {ocrImagePath}"); + string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString(); byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image); File.WriteAllBytes(layoutImagePath, layoutImageBytes); Console.WriteLine($"Output image saved at {layoutImagePath}"); - Console.WriteLine("\nDetected seal impressions:"); - Console.WriteLine(jsonResponse["result"]["sealImpressions"].ToString()); + Console.WriteLine("\nDetected texts:"); + Console.WriteLine(jsonResponse["result"]["texts"].ToString()); } } @@ -877,6 +927,7 @@ const fs = require('fs'); const API_URL = 'http://localhost:8080/seal-recognition' const imagePath = './demo.jpg' +const ocrImagePath = "./ocr.jpg"; const layoutImagePath = "./layout.jpg"; let config = { @@ -897,14 +948,20 @@ axios.request(config) .then((response) => { const result = response.data["result"]; + const imageBuffer = Buffer.from(result["ocrImage"], 'base64'); + fs.writeFile(ocrImagePath, imageBuffer, (err) => { + if (err) throw err; + console.log(`Output image saved at ${ocrImagePath}`); + }); + imageBuffer = Buffer.from(result["layoutImage"], 'base64'); fs.writeFile(layoutImagePath, imageBuffer, (err) => { if (err) throw err; console.log(`Output image saved at ${layoutImagePath}`); }); - console.log("\nDetected seal impressions:"); - console.log(result["sealImpressions"]); + console.log("\nDetected texts:"); + console.log(result["texts"]); }) .catch((error) => { console.log(error); @@ -917,6 +974,7 @@ axios.request(config) $API_URL = "http://localhost:8080/seal-recognition"; $image_path = "./demo.jpg"; +$ocr_image_path = "./ocr.jpg"; $layout_image_path = "./layout.jpg"; $image_data = base64_encode(file_get_contents($image_path)); @@ -931,12 +989,14 @@ $response = curl_exec($ch); curl_close($ch); $result = json_decode($response, true)["result"]; +file_put_contents($ocr_image_path, base64_decode($result["ocrImage"])); +echo "Output image saved at " . $ocr_image_path . "\n"; file_put_contents($layout_image_path, base64_decode($result["layoutImage"])); echo "Output image saved at " . $layout_image_path . "\n"; -echo "\nDetected seal impressions:\n"; -print_r($result["sealImpressions"]); +echo "\nDetected texts:\n"; +print_r($result["texts"]); ?> diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md index ddea61f389..b0e9e73608 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md @@ -565,31 +565,19 @@ for res in output: - + - + - -
sealImpressionstexts array印章文本识别结果。文本位置、内容和得分。
layoutImage string 版面区域检测结果图。图像为JPEG格式,使用Base64编码。
-

sealImpressions中的每个元素为一个object,具有如下属性:

- - - - - - - - - - - - + + +
名称类型含义
textsarray文本位置、内容和得分。ocrImagestringOCR结果图。图像为JPEG格式,使用Base64编码。
@@ -632,6 +620,7 @@ import requests API_URL = "http://localhost:8080/seal-recognition" # 服务URL image_path = "./demo.jpg" +ocr_image_path = "./ocr.jpg" layout_image_path = "./layout.jpg" # 对本地图像进行Base64编码 @@ -647,11 +636,14 @@ response = requests.post(API_URL, json=payload) # 处理接口返回数据 assert response.status_code == 200 result = response.json()["result"] +with open(ocr_image_path, "wb") as file: + file.write(base64.b64decode(result["ocrImage"])) +print(f"Output image saved at {ocr_image_path}") with open(layout_image_path, "wb") as file: file.write(base64.b64decode(result["layoutImage"])) print(f"Output image saved at {layout_image_path}") -print("\nDetected seal impressions:") -print(result["sealImpressions"]) +print("\nDetected texts:") +print(result["texts"])
C++ @@ -664,6 +656,7 @@ print(result["sealImpressions"]) int main() { httplib::Client client("localhost:8080"); const std::string imagePath = "./demo.jpg"; + const std::string ocrImagePath = "./ocr.jpg"; const std::string layoutImagePath = "./layout.jpg"; httplib::Headers headers = { @@ -694,6 +687,18 @@ int main() { nlohmann::json jsonResponse = nlohmann::json::parse(response->body); auto result = jsonResponse["result"]; + encodedImage = result["ocrImage"]; + std::string decoded_string = base64::from_base64(encodedImage); + std::vector<unsigned char> decodedOcrImage(decoded_string.begin(), decoded_string.end()); + std::ofstream outputOcrFile(ocrImagePath, std::ios::binary | std::ios::out); + if (outputOcrFile.is_open()) { + outputOcrFile.write(reinterpret_cast<char*>(decodedOcrImage.data()), decodedOcrImage.size()); + outputOcrFile.close(); + std::cout << "Output image saved at " << ocrImagePath << std::endl; + } else { + std::cerr << "Unable to open file for writing: " << ocrImagePath << std::endl; + } + encodedImage = result["layoutImage"]; decodedString = base64::from_base64(encodedImage); std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end()); @@ -706,10 +711,10 @@ int main() { std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl; } - auto impressions = result["sealImpressions"]; - std::cout << "\nDetected seal impressions:" << std::endl; - for (const auto& impression : impressions) { - std::cout << impression << std::endl; + auto texts = result["texts"]; + std::cout << "\nDetected texts:" << std::endl; + for (const auto& text : texts) { + std::cout << text << std::endl; } } else { std::cout << "Failed to send HTTP request." << std::endl; @@ -736,6 +741,7 @@ public class Main { public static void main(String[] args) throws IOException { String API_URL = "http://localhost:8080/seal-recognition"; // 服务URL String imagePath = "./demo.jpg"; // 本地图像 + String ocrImagePath = "./ocr.jpg"; String layoutImagePath = "./layout.jpg"; // 对本地图像进行Base64编码 @@ -762,8 +768,15 @@ public class Main { String responseBody = response.body().string(); JsonNode resultNode = objectMapper.readTree(responseBody); JsonNode result = resultNode.get("result"); + String ocrBase64Image = result.get("ocrImage").asText(); String layoutBase64Image = result.get("layoutImage").asText(); - JsonNode impressions = result.get("sealImpressions"); + JsonNode texts = result.get("texts"); + + byte[] imageBytes = Base64.getDecoder().decode(ocrBase64Image); + try (FileOutputStream fos = new FileOutputStream(ocrImagePath)) { + fos.write(imageBytes); + } + System.out.println("Output image saved at " + ocrBase64Image); imageBytes = Base64.getDecoder().decode(layoutBase64Image); try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) { @@ -771,7 +784,7 @@ public class Main { } System.out.println("Output image saved at " + layoutImagePath); - System.out.println("\nDetected seal impressions: " + impressions.toString()); + System.out.println("\nDetected texts: " + texts.toString()); } else { System.err.println("Request failed with code: " + response.code()); } @@ -796,6 +809,7 @@ import ( func main() { API_URL := "http://localhost:8080/seal-recognition" imagePath := "./demo.jpg" + ocrImagePath := "./ocr.jpg" layoutImagePath := "./layout.jpg" // 对本地图像进行Base64编码 @@ -836,8 +850,9 @@ func main() { } type Response struct { Result struct { + OcrImage string `json:"ocrImage"` LayoutImage string `json:"layoutImage"` - Impressions []map[string]interface{} `json:"sealImpressions"` + Texts []map[string]interface{} `json:"texts"` } `json:"result"` } var respData Response @@ -847,6 +862,18 @@ func main() { return } + ocrImageData, err := base64.StdEncoding.DecodeString(respData.Result.OcrImage) + if err != nil { + fmt.Println("Error decoding base64 image data:", err) + return + } + err = ioutil.WriteFile(ocrImagePath, ocrImageData, 0644) + if err != nil { + fmt.Println("Error writing image to file:", err) + return + } + fmt.Printf("Image saved at %s.jpg\n", ocrImagePath) + layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage) if err != nil { fmt.Println("Error decoding base64 image data:", err) @@ -859,9 +886,9 @@ func main() { } fmt.Printf("Image saved at %s.jpg\n", layoutImagePath) - fmt.Println("\nDetected seal impressions:") - for _, impression := range respData.Result.Impressions { - fmt.Println(impression) + fmt.Println("\nDetected texts:") + for _, text := range respData.Result.Texts { + fmt.Println(text) } }
@@ -880,6 +907,7 @@ class Program { static readonly string API_URL = "http://localhost:8080/seal-recognition"; static readonly string imagePath = "./demo.jpg"; + static readonly string ocrImagePath = "./ocr.jpg"; static readonly string layoutImagePath = "./layout.jpg"; static async Task Main(string[] args) @@ -901,13 +929,18 @@ class Program string responseBody = await response.Content.ReadAsStringAsync(); JObject jsonResponse = JObject.Parse(responseBody); + string ocrBase64Image = jsonResponse["result"]["ocrImage"].ToString(); + byte[] ocrImageBytes = Convert.FromBase64String(ocrBase64Image); + File.WriteAllBytes(ocrImagePath, ocrImageBytes); + Console.WriteLine($"Output image saved at {ocrImagePath}"); + string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString(); byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image); File.WriteAllBytes(layoutImagePath, layoutImageBytes); Console.WriteLine($"Output image saved at {layoutImagePath}"); - Console.WriteLine("\nDetected seal impressions:"); - Console.WriteLine(jsonResponse["result"]["sealImpressions"].ToString()); + Console.WriteLine("\nDetected texts:"); + Console.WriteLine(jsonResponse["result"]["texts"].ToString()); } } @@ -919,6 +952,7 @@ const fs = require('fs'); const API_URL = 'http://localhost:8080/seal-recognition' const imagePath = './demo.jpg' +const ocrImagePath = "./ocr.jpg"; const layoutImagePath = "./layout.jpg"; let config = { @@ -942,14 +976,20 @@ axios.request(config) // 处理接口返回数据 const result = response.data["result"]; + const imageBuffer = Buffer.from(result["ocrImage"], 'base64'); + fs.writeFile(ocrImagePath, imageBuffer, (err) => { + if (err) throw err; + console.log(`Output image saved at ${ocrImagePath}`); + }); + imageBuffer = Buffer.from(result["layoutImage"], 'base64'); fs.writeFile(layoutImagePath, imageBuffer, (err) => { if (err) throw err; console.log(`Output image saved at ${layoutImagePath}`); }); - console.log("\nDetected seal impressions:"); - console.log(result["sealImpressions"]); + console.log("\nDetected texts:"); + console.log(result["texts"]); }) .catch((error) => { console.log(error); @@ -962,6 +1002,7 @@ axios.request(config) $API_URL = "http://localhost:8080/seal-recognition"; // 服务URL $image_path = "./demo.jpg"; +$ocr_image_path = "./ocr.jpg"; $layout_image_path = "./layout.jpg"; // 对本地图像进行Base64编码 @@ -979,12 +1020,14 @@ curl_close($ch); // 处理接口返回数据 $result = json_decode($response, true)["result"]; +file_put_contents($ocr_image_path, base64_decode($result["ocrImage"])); +echo "Output image saved at " . $ocr_image_path . "\n"; file_put_contents($layout_image_path, base64_decode($result["layoutImage"])); echo "Output image saved at " . $layout_image_path . "\n"; -echo "\nDetected seal impressions:\n"; -print_r($result["sealImpressions"]); +echo "\nDetected texts:\n"; +print_r($result["texts"]); ?> diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py index ae139025f7..82cfef242d 100644 --- a/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py +++ b/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py @@ -45,7 +45,8 @@ class Formula(BaseModel): class InferResult(BaseModel): formulas: List[Formula] - image: str + layoutImage: str + ocrImage: Optional[str] = None def create_pipeline_app( @@ -59,6 +60,7 @@ def create_pipeline_app( "/formula-recognition", operation_id="infer", responses={422: {"model": Response}}, + response_model_exclude_none=True, ) async def _infer(request: InferRequest) -> ResultResponse[InferResult]: pipeline = ctx.pipeline @@ -88,9 +90,16 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]: latex=latex, ) ) - output_image_base64 = serving_utils.base64_encode( - serving_utils.image_to_bytes(result.img) + layout_image_base64 = serving_utils.base64_encode( + serving_utils.image_to_bytes(result["layout_result"].img) ) + ocr_image = result["formula_result"].img + if ocr_image is not None: + ocr_image_base64 = serving_utils.base64_encode( + serving_utils.image_to_bytes(ocr_image) + ) + else: + ocr_image_base64 = None return ResultResponse( logId=serving_utils.generate_log_id(), @@ -98,7 +107,8 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]: errorMsg="Success", result=InferResult( formulas=formulas, - image=output_image_base64, + layoutImage=layout_image_base64, + ocrImage=ocr_image_base64, ), ) diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py index 7d9ec20fcc..0632f2ef4e 100644 --- a/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py +++ b/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py @@ -44,13 +44,10 @@ class Text(BaseModel): score: float -class SealImpression(BaseModel): - texts: List[Text] - - class InferResult(BaseModel): - sealImpressions: List[SealImpression] + texts: List[Text] layoutImage: str + ocrImage: str def create_pipeline_app(pipeline: SealOCRPipeline, app_config: AppConfig) -> FastAPI: @@ -81,27 +78,28 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]: result = (await pipeline.infer(image))[0] - seal_impressions: List[SealImpression] = [] - for item in result["ocr_result"]: - texts: List[Text] = [] - for poly, text, score in zip( - item["dt_polys"], item["rec_text"], item["rec_score"] - ): - texts.append(Text(poly=poly, text=text, score=score)) - seal_impressions.append(SealImpression(texts=texts)) + texts: List[Text] = [] + for poly, text, score in zip( + result["ocr_result"]["dt_polys"], + result["ocr_result"]["rec_text"], + result["ocr_result"]["rec_score"], + ): + texts.append(Text(poly=poly, text=text, score=score)) layout_image_base64 = serving_utils.base64_encode( serving_utils.image_to_bytes(result["layout_result"].img) ) - - # TODO: OCR image + ocr_image_base64 = serving_utils.base64_encode( + serving_utils.image_to_bytes(result["ocr_result"].img) + ) return ResultResponse( logId=serving_utils.generate_log_id(), errorCode=0, errorMsg="Success", result=InferResult( - sealImpressions=seal_impressions, + texts=texts, layoutImage=layout_image_base64, + ocrImage=ocr_image_base64, ), ) diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py index 36e221efff..55466bafc7 100644 --- a/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py +++ b/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py @@ -88,7 +88,7 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]: layout_image_base64 = serving_utils.base64_encode( serving_utils.image_to_bytes(result["layout_result"].img) ) - ocr_iamge_base64 = serving_utils.base64_encode( + ocr_image_base64 = serving_utils.base64_encode( serving_utils.image_to_bytes(result["ocr_result"].img) ) @@ -99,7 +99,7 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]: result=InferResult( tables=tables, layoutImage=layout_image_base64, - ocrImage=ocr_iamge_base64, + ocrImage=ocr_image_base64, ), )