diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md
index acc19b4993..78841f087e 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md
@@ -411,9 +411,14 @@ Below are the API references and multi-language service invocation examples:
Positions and contents of formulas. |
-image |
+layoutImage |
string |
-Formula recognition result image with detected formula positions annotated. The image is in JPEG format and encoded in Base64. |
+Layout area detection result image. The image is in JPEG format and encoded using Base64. |
+
+
+ocrImage |
+string |
+OCR result image. The image is in JPEG format and encoded using Base64. |
@@ -464,7 +469,8 @@ Below are the API references and multi-language service invocation examples:
"latex": "F({\bf x})=C(F_{1}(x_{1}),\cdot\cdot\cdot,F_{N}(x_{N})).\qquad\qquad\qquad(1)"
}
],
-"image": "xxxxxx"
+"layoutImage": "xxxxxx",
+"ocrImage": "xxxxxx"
}
@@ -479,7 +485,7 @@ import requests
API_URL = "http://localhost:8080/formula-recognition"
image_path = "./demo.jpg"
-output_image_path = "./out.jpg"
+layout_image_path = "./layout.jpg"
with open(image_path, "rb") as file:
image_bytes = file.read()
@@ -491,9 +497,9 @@ response = requests.post(API_URL, json=payload)
assert response.status_code == 200
result = response.json()["result"]
-with open(output_image_path, "wb") as file:
- file.write(base64.b64decode(result["image"]))
-print(f"Output image saved at {output_image_path}")
+with open(layout_image_path, "wb") as file:
+ file.write(base64.b64decode(result["layoutImage"]))
+print(f"Output image saved at {layout_image_path}")
print("\nDetected formulas:")
print(result["formulas"])
@@ -508,7 +514,7 @@ print(result["formulas"])
int main() {
httplib::Client client("localhost:8080");
const std::string imagePath = "./demo.jpg";
- const std::string outputImagePath = "./out.jpg";
+ const std::string layoutImagePath = "./layout.jpg";
httplib::Headers headers = {
{"Content-Type", "application/json"}
@@ -535,16 +541,16 @@ int main() {
nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
auto result = jsonResponse["result"];
- encodedImage = result["image"];
- std::string decodedString = base64::from_base64(encodedImage);
- std::vector<unsigned char> decodedImage(decodedString.begin(), decodedString.end());
- std::ofstream outputImage(outPutImagePath, std::ios::binary | std::ios::out);
- if (outputImage.is_open()) {
- outputImage.write(reinterpret_cast<char*>(decodedImage.data()), decodedImage.size());
- outputImage.close();
- std::cout << "Output image saved at " << outPutImagePath << std::endl;
+ encodedImage = result["layoutImage"];
+ decodedString = base64::from_base64(encodedImage);
+ std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end());
+ std::ofstream outputLayoutFile(layoutImagePath, std::ios::binary | std::ios::out);
+ if (outputLayoutFile.is_open()) {
+ outputLayoutFile.write(reinterpret_cast<char*>(decodedLayoutImage.data()), decodedLayoutImage.size());
+ outputLayoutFile.close();
+ std::cout << "Output image saved at " << layoutImagePath << std::endl;
} else {
- std::cerr << "Unable to open file for writing: " << outPutImagePath << std::endl;
+ std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl;
}
auto formulas = result["formulas"];
@@ -577,7 +583,7 @@ public class Main {
public static void main(String[] args) throws IOException {
String API_URL = "http://localhost:8080/formula-recognition";
String imagePath = "./demo.jpg";
- String outputImagePath = "./out.jpg";
+ String layoutImagePath = "./layout.jpg";
File file = new File(imagePath);
byte[] fileContent = java.nio.file.Files.readAllBytes(file.toPath());
@@ -600,14 +606,15 @@ public class Main {
String responseBody = response.body().string();
JsonNode resultNode = objectMapper.readTree(responseBody);
JsonNode result = resultNode.get("result");
- String base64Image = result.get("image").asText();
+ String layoutBase64Image = result.get("layoutImage").asText();
JsonNode formulas = result.get("formulas");
- byte[] imageBytes = Base64.getDecoder().decode(base64Image);
- try (FileOutputStream fos = new FileOutputStream(outputImagePath)) {
+ imageBytes = Base64.getDecoder().decode(layoutBase64Image);
+ try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) {
fos.write(imageBytes);
}
- System.out.println("Output image saved at " + outputImagePath);
+ System.out.println("Output image saved at " + layoutImagePath);
+
System.out.println("\nDetected formulas: " + formulas.toString());
} else {
System.err.println("Request failed with code: " + response.code());
@@ -633,7 +640,7 @@ import (
func main() {
API_URL := "http://localhost:8080/formula-recognition"
imagePath := "./demo.jpg"
- outputImagePath := "./out.jpg"
+ layoutImagePath := "./layout.jpg"
imageBytes, err := ioutil.ReadFile(imagePath)
if err != nil {
@@ -670,7 +677,7 @@ func main() {
}
type Response struct {
Result struct {
- Image string `json:"image"`
+ LayoutImage string `json:"layoutImage"`
Formulas []map[string]interface{} `json:"formulas"`
} `json:"result"`
}
@@ -681,17 +688,18 @@ func main() {
return
}
- outputImageData, err := base64.StdEncoding.DecodeString(respData.Result.Image)
+ layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage)
if err != nil {
fmt.Println("Error decoding base64 image data:", err)
return
}
- err = ioutil.WriteFile(outputImagePath, outputImageData, 0644)
+ err = ioutil.WriteFile(layoutImagePath, layoutImageData, 0644)
if err != nil {
fmt.Println("Error writing image to file:", err)
return
}
- fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
+ fmt.Printf("Image saved at %s.jpg\n", layoutImagePath)
+
fmt.Println("\nDetected formulas:")
for _, formula := range respData.Result.Formulas {
fmt.Println(formula)
@@ -713,7 +721,7 @@ class Program
{
static readonly string API_URL = "http://localhost:8080/formula-recognition";
static readonly string imagePath = "./demo.jpg";
- static readonly string outputImagePath = "./out.jpg";
+ static readonly string layoutImagePath = "./layout.jpg";
static async Task Main(string[] args)
{
@@ -731,11 +739,11 @@ class Program
string responseBody = await response.Content.ReadAsStringAsync();
JObject jsonResponse = JObject.Parse(responseBody);
- string base64Image = jsonResponse["result"]["image"].ToString();
- byte[] outputImageBytes = Convert.FromBase64String(base64Image);
+ string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString();
+ byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image);
+ File.WriteAllBytes(layoutImagePath, layoutImageBytes);
+ Console.WriteLine($"Output image saved at {layoutImagePath}");
- File.WriteAllBytes(outputImagePath, outputImageBytes);
- Console.WriteLine($"Output image saved at {outputImagePath}");
Console.WriteLine("\nDetected formulas:");
Console.WriteLine(jsonResponse["result"]["formulas"].ToString());
}
@@ -749,7 +757,7 @@ const fs = require('fs');
const API_URL = 'http://localhost:8080/formula-recognition'
const imagePath = './demo.jpg'
-const outputImagePath = "./out.jpg";
+const layoutImagePath = "./layout.jpg";
let config = {
method: 'POST',
@@ -768,11 +776,13 @@ function encodeImageToBase64(filePath) {
axios.request(config)
.then((response) => {
const result = response.data["result"];
- const imageBuffer = Buffer.from(result["image"], 'base64');
- fs.writeFile(outputImagePath, imageBuffer, (err) => {
+
+ imageBuffer = Buffer.from(result["layoutImage"], 'base64');
+ fs.writeFile(layoutImagePath, imageBuffer, (err) => {
if (err) throw err;
- console.log(`Output image saved at ${outputImagePath}`);
+ console.log(`Output image saved at ${layoutImagePath}`);
});
+
console.log("\nDetected formulas:");
console.log(result["formulas"]);
})
@@ -785,9 +795,9 @@ axios.request(config)
<?php
-$API_URL = "http://localhost:8080/formula-recognition";
+$API_URL = "http://localhost:8080/formula-recognition"
$image_path = "./demo.jpg";
-$output_image_path = "./out.jpg";
+$layout_image_path = "./layout.jpg"
$image_data = base64_encode(file_get_contents($image_path));
$payload = array("image" => $image_data);
@@ -801,8 +811,10 @@ $response = curl_exec($ch);
curl_close($ch);
$result = json_decode($response, true)["result"];
-file_put_contents($output_image_path, base64_decode($result["image"]));
-echo "Output image saved at " . $output_image_path . "\n";
+
+file_put_contents($layout_image_path, base64_decode($result["layoutImage"]));
+echo "Output image saved at " . $layout_image_path . "\n";
+
echo "\nDetected formulas:\n";
print_r($result["formulas"]);
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
index 5bf2c5709c..8d8a39bb67 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
@@ -412,9 +412,14 @@ for res in output:
公式位置和内容。 |
-image |
+layoutImage |
string |
-公式识别结果图,其中标注检测到的公式位置。图像为JPEG格式,使用Base64编码。 |
+版面区域检测结果图。图像为JPEG格式,使用Base64编码。 |
+
+
+ocrImage |
+string |
+OCR结果图。图像为JPEG格式,使用Base64编码。 |
@@ -465,7 +470,8 @@ for res in output:
"latex": "F({\bf x})=C(F_{1}(x_{1}),\cdot\cdot\cdot,F_{N}(x_{N})).\qquad\qquad\qquad(1)"
}
],
-"image": "xxxxxx"
+"layoutImage": "xxxxxx",
+"ocrImage": "xxxxxx"
}
@@ -480,7 +486,7 @@ import requests
API_URL = "http://localhost:8080/formula-recognition" # 服务URL
image_path = "./demo.jpg"
-output_image_path = "./out.jpg"
+layout_image_path = "./layout.jpg"
# 对本地图像进行Base64编码
with open(image_path, "rb") as file:
@@ -495,9 +501,9 @@ response = requests.post(API_URL, json=payload)
# 处理接口返回数据
assert response.status_code == 200
result = response.json()["result"]
-with open(output_image_path, "wb") as file:
- file.write(base64.b64decode(result["image"]))
-print(f"Output image saved at {output_image_path}")
+with open(layout_image_path, "wb") as file:
+ file.write(base64.b64decode(result["layoutImage"]))
+print(f"Output image saved at {layout_image_path}")
print("\nDetected formulas:")
print(result["formulas"])
@@ -512,7 +518,7 @@ print(result["formulas"])
int main() {
httplib::Client client("localhost:8080");
const std::string imagePath = "./demo.jpg";
- const std::string outputImagePath = "./out.jpg";
+ const std::string layoutImagePath = "./layout.jpg";
httplib::Headers headers = {
{"Content-Type", "application/json"}
@@ -542,16 +548,16 @@ int main() {
nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
auto result = jsonResponse["result"];
- encodedImage = result["image"];
- std::string decodedString = base64::from_base64(encodedImage);
- std::vector<unsigned char> decodedImage(decodedString.begin(), decodedString.end());
- std::ofstream outputImage(outPutImagePath, std::ios::binary | std::ios::out);
- if (outputImage.is_open()) {
- outputImage.write(reinterpret_cast<char*>(decodedImage.data()), decodedImage.size());
- outputImage.close();
- std::cout << "Output image saved at " << outPutImagePath << std::endl;
+ encodedImage = result["layoutImage"];
+ decodedString = base64::from_base64(encodedImage);
+ std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end());
+ std::ofstream outputLayoutFile(layoutImagePath, std::ios::binary | std::ios::out);
+ if (outputLayoutFile.is_open()) {
+ outputLayoutFile.write(reinterpret_cast<char*>(decodedLayoutImage.data()), decodedLayoutImage.size());
+ outputLayoutFile.close();
+ std::cout << "Output image saved at " << layoutImagePath << std::endl;
} else {
- std::cerr << "Unable to open file for writing: " << outPutImagePath << std::endl;
+ std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl;
}
auto formulas = result["formulas"];
@@ -584,7 +590,7 @@ public class Main {
public static void main(String[] args) throws IOException {
String API_URL = "http://localhost:8080/formula-recognition"; // 服务URL
String imagePath = "./demo.jpg"; // 本地图像
- String outputImagePath = "./out.jpg"; // 输出图像
+ String layoutImagePath = "./layout.jpg";
// 对本地图像进行Base64编码
File file = new File(imagePath);
@@ -610,14 +616,15 @@ public class Main {
String responseBody = response.body().string();
JsonNode resultNode = objectMapper.readTree(responseBody);
JsonNode result = resultNode.get("result");
- String base64Image = result.get("image").asText();
+ String layoutBase64Image = result.get("layoutImage").asText();
JsonNode formulas = result.get("formulas");
- byte[] imageBytes = Base64.getDecoder().decode(base64Image);
- try (FileOutputStream fos = new FileOutputStream(outputImagePath)) {
+ imageBytes = Base64.getDecoder().decode(layoutBase64Image);
+ try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) {
fos.write(imageBytes);
}
- System.out.println("Output image saved at " + outputImagePath);
+ System.out.println("Output image saved at " + layoutImagePath);
+
System.out.println("\nDetected formulas: " + formulas.toString());
} else {
System.err.println("Request failed with code: " + response.code());
@@ -643,7 +650,7 @@ import (
func main() {
API_URL := "http://localhost:8080/formula-recognition"
imagePath := "./demo.jpg"
- outputImagePath := "./out.jpg"
+ layoutImagePath := "./layout.jpg"
// 对本地图像进行Base64编码
imageBytes, err := ioutil.ReadFile(imagePath)
@@ -683,7 +690,7 @@ func main() {
}
type Response struct {
Result struct {
- Image string `json:"image"`
+ LayoutImage string `json:"layoutImage"`
Formulas []map[string]interface{} `json:"formulas"`
} `json:"result"`
}
@@ -694,17 +701,18 @@ func main() {
return
}
- outputImageData, err := base64.StdEncoding.DecodeString(respData.Result.Image)
+ layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage)
if err != nil {
fmt.Println("Error decoding base64 image data:", err)
return
}
- err = ioutil.WriteFile(outputImagePath, outputImageData, 0644)
+ err = ioutil.WriteFile(layoutImagePath, layoutImageData, 0644)
if err != nil {
fmt.Println("Error writing image to file:", err)
return
}
- fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
+ fmt.Printf("Image saved at %s.jpg\n", layoutImagePath)
+
fmt.Println("\nDetected formulas:")
for _, formula := range respData.Result.Formulas {
fmt.Println(formula)
@@ -726,7 +734,7 @@ class Program
{
static readonly string API_URL = "http://localhost:8080/formula-recognition";
static readonly string imagePath = "./demo.jpg";
- static readonly string outputImagePath = "./out.jpg";
+ static readonly string layoutImagePath = "./layout.jpg";
static async Task Main(string[] args)
{
@@ -747,11 +755,11 @@ class Program
string responseBody = await response.Content.ReadAsStringAsync();
JObject jsonResponse = JObject.Parse(responseBody);
- string base64Image = jsonResponse["result"]["image"].ToString();
- byte[] outputImageBytes = Convert.FromBase64String(base64Image);
+ string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString();
+ byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image);
+ File.WriteAllBytes(layoutImagePath, layoutImageBytes);
+ Console.WriteLine($"Output image saved at {layoutImagePath}");
- File.WriteAllBytes(outputImagePath, outputImageBytes);
- Console.WriteLine($"Output image saved at {outputImagePath}");
Console.WriteLine("\nDetected formulas:");
Console.WriteLine(jsonResponse["result"]["formulas"].ToString());
}
@@ -765,7 +773,7 @@ const fs = require('fs');
const API_URL = 'http://localhost:8080/formula-recognition'
const imagePath = './demo.jpg'
-const outputImagePath = "./out.jpg";
+const layoutImagePath = "./layout.jpg";
let config = {
method: 'POST',
@@ -787,11 +795,13 @@ axios.request(config)
.then((response) => {
// 处理接口返回数据
const result = response.data["result"];
- const imageBuffer = Buffer.from(result["image"], 'base64');
- fs.writeFile(outputImagePath, imageBuffer, (err) => {
+
+ imageBuffer = Buffer.from(result["layoutImage"], 'base64');
+ fs.writeFile(layoutImagePath, imageBuffer, (err) => {
if (err) throw err;
- console.log(`Output image saved at ${outputImagePath}`);
+ console.log(`Output image saved at ${layoutImagePath}`);
});
+
console.log("\nDetected formulas:");
console.log(result["formulas"]);
})
@@ -806,7 +816,7 @@ axios.request(config)
$API_URL = "http://localhost:8080/formula-recognition"; // 服务URL
$image_path = "./demo.jpg";
-$output_image_path = "./out.jpg";
+$layout_image_path = "./layout.jpg";
// 对本地图像进行Base64编码
$image_data = base64_encode(file_get_contents($image_path));
@@ -823,8 +833,10 @@ curl_close($ch);
// 处理接口返回数据
$result = json_decode($response, true)["result"];
-file_put_contents($output_image_path, base64_decode($result["image"]));
-echo "Output image saved at " . $output_image_path . "\n";
+
+file_put_contents($layout_image_path, base64_decode($result["layoutImage"]));
+echo "Output image saved at " . $layout_image_path . "\n";
+
echo "\nDetected formulas:\n";
print_r($result["formulas"]);
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md
index 536379f5b5..b708defb72 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md
@@ -555,18 +555,23 @@ Below are the API references and multi-language service invocation examples:
-sealImpressions |
+texts |
array |
-Seal recognition results. |
+Positions, contents, and scores of texts. |
layoutImage |
string |
Layout area detection result image. The image is in JPEG format and encoded using Base64. |
+
+ocrImage |
+string |
+OCR result image. The image is in JPEG format and encoded using Base64. |
+
-Each element in sealImpressions
is an object
with the following properties:
+Each element in texts
is an object
with the following properties:
@@ -605,6 +610,7 @@ import requests
API_URL = "http://localhost:8080/seal-recognition"
image_path = "./demo.jpg"
+ocr_image_path = "./ocr.jpg"
layout_image_path = "./layout.jpg"
with open(image_path, "rb") as file:
@@ -617,11 +623,14 @@ response = requests.post(API_URL, json=payload)
assert response.status_code == 200
result = response.json()["result"]
+with open(ocr_image_path, "wb") as file:
+ file.write(base64.b64decode(result["ocrImage"]))
+print(f"Output image saved at {ocr_image_path}")
with open(layout_image_path, "wb") as file:
file.write(base64.b64decode(result["layoutImage"]))
print(f"Output image saved at {layout_image_path}")
-print("\nDetected seal impressions:")
-print(result["sealImpressions"])
+print("\nDetected texts:")
+print(result["texts"])
C++
@@ -634,6 +643,7 @@ print(result["sealImpressions"])
int main() {
httplib::Client client("localhost:8080");
const std::string imagePath = "./demo.jpg";
+ const std::string ocrImagePath = "./ocr.jpg";
const std::string layoutImagePath = "./layout.jpg";
httplib::Headers headers = {
@@ -661,6 +671,18 @@ int main() {
nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
auto result = jsonResponse["result"];
+ encodedImage = result["ocrImage"];
+ std::string decoded_string = base64::from_base64(encodedImage);
+ std::vector<unsigned char> decodedOcrImage(decoded_string.begin(), decoded_string.end());
+ std::ofstream outputOcrFile(ocrImagePath, std::ios::binary | std::ios::out);
+ if (outputOcrFile.is_open()) {
+ outputOcrFile.write(reinterpret_cast<char*>(decodedOcrImage.data()), decodedOcrImage.size());
+ outputOcrFile.close();
+ std::cout << "Output image saved at " << ocrImagePath << std::endl;
+ } else {
+ std::cerr << "Unable to open file for writing: " << ocrImagePath << std::endl;
+ }
+
encodedImage = result["layoutImage"];
decodedString = base64::from_base64(encodedImage);
std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end());
@@ -673,10 +695,10 @@ int main() {
std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl;
}
- auto impressions = result["sealImpressions"];
- std::cout << "\nDetected seal impressions:" << std::endl;
- for (const auto& impression : impressions) {
- std::cout << impression << std::endl;
+ auto texts = result["texts"];
+ std::cout << "\nDetected texts:" << std::endl;
+ for (const auto& text : texts) {
+ std::cout << text << std::endl;
}
} else {
std::cout << "Failed to send HTTP request." << std::endl;
@@ -703,6 +725,7 @@ public class Main {
public static void main(String[] args) throws IOException {
String API_URL = "http://localhost:8080/seal-recognition";
String imagePath = "./demo.jpg";
+ String ocrImagePath = "./ocr.jpg";
String layoutImagePath = "./layout.jpg";
File file = new File(imagePath);
@@ -726,8 +749,15 @@ public class Main {
String responseBody = response.body().string();
JsonNode resultNode = objectMapper.readTree(responseBody);
JsonNode result = resultNode.get("result");
+ String ocrBase64Image = result.get("ocrImage").asText();
String layoutBase64Image = result.get("layoutImage").asText();
- JsonNode impressions = result.get("sealImpressions");
+ JsonNode texts = result.get("texts");
+
+ byte[] imageBytes = Base64.getDecoder().decode(ocrBase64Image);
+ try (FileOutputStream fos = new FileOutputStream(ocrImagePath)) {
+ fos.write(imageBytes);
+ }
+ System.out.println("Output image saved at " + ocrBase64Image);
imageBytes = Base64.getDecoder().decode(layoutBase64Image);
try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) {
@@ -735,7 +765,7 @@ public class Main {
}
System.out.println("Output image saved at " + layoutImagePath);
- System.out.println("\nDetected seal impressions: " + impressions.toString());
+ System.out.println("\nDetected texts: " + texts.toString());
} else {
System.err.println("Request failed with code: " + response.code());
}
@@ -760,6 +790,7 @@ import (
func main() {
API_URL := "http://localhost:8080/seal-recognition"
imagePath := "./demo.jpg"
+ ocrImagePath := "./ocr.jpg"
layoutImagePath := "./layout.jpg"
imageBytes, err := ioutil.ReadFile(imagePath)
@@ -797,8 +828,9 @@ func main() {
}
type Response struct {
Result struct {
+ OcrImage string `json:"ocrImage"`
LayoutImage string `json:"layoutImage"`
- Impressions []map[string]interface{} `json:"sealImpressions"`
+ Texts []map[string]interface{} `json:"texts"`
} `json:"result"`
}
var respData Response
@@ -808,6 +840,18 @@ func main() {
return
}
+ ocrImageData, err := base64.StdEncoding.DecodeString(respData.Result.OcrImage)
+ if err != nil {
+ fmt.Println("Error decoding base64 image data:", err)
+ return
+ }
+ err = ioutil.WriteFile(ocrImagePath, ocrImageData, 0644)
+ if err != nil {
+ fmt.Println("Error writing image to file:", err)
+ return
+ }
+ fmt.Printf("Image saved at %s.jpg\n", ocrImagePath)
+
layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage)
if err != nil {
fmt.Println("Error decoding base64 image data:", err)
@@ -820,9 +864,9 @@ func main() {
}
fmt.Printf("Image saved at %s.jpg\n", layoutImagePath)
- fmt.Println("\nDetected seal impressions:")
- for _, impression := range respData.Result.Impressions {
- fmt.Println(impression)
+ fmt.Println("\nDetected texts:")
+ for _, text := range respData.Result.Texts {
+ fmt.Println(text)
}
}
@@ -841,6 +885,7 @@ class Program
{
static readonly string API_URL = "http://localhost:8080/seal-recognition";
static readonly string imagePath = "./demo.jpg";
+ static readonly string ocrImagePath = "./ocr.jpg";
static readonly string layoutImagePath = "./layout.jpg";
static async Task Main(string[] args)
@@ -859,13 +904,18 @@ class Program
string responseBody = await response.Content.ReadAsStringAsync();
JObject jsonResponse = JObject.Parse(responseBody);
+ string ocrBase64Image = jsonResponse["result"]["ocrImage"].ToString();
+ byte[] ocrImageBytes = Convert.FromBase64String(ocrBase64Image);
+ File.WriteAllBytes(ocrImagePath, ocrImageBytes);
+ Console.WriteLine($"Output image saved at {ocrImagePath}");
+
string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString();
byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image);
File.WriteAllBytes(layoutImagePath, layoutImageBytes);
Console.WriteLine($"Output image saved at {layoutImagePath}");
- Console.WriteLine("\nDetected seal impressions:");
- Console.WriteLine(jsonResponse["result"]["sealImpressions"].ToString());
+ Console.WriteLine("\nDetected texts:");
+ Console.WriteLine(jsonResponse["result"]["texts"].ToString());
}
}
@@ -877,6 +927,7 @@ const fs = require('fs');
const API_URL = 'http://localhost:8080/seal-recognition'
const imagePath = './demo.jpg'
+const ocrImagePath = "./ocr.jpg";
const layoutImagePath = "./layout.jpg";
let config = {
@@ -897,14 +948,20 @@ axios.request(config)
.then((response) => {
const result = response.data["result"];
+ const imageBuffer = Buffer.from(result["ocrImage"], 'base64');
+ fs.writeFile(ocrImagePath, imageBuffer, (err) => {
+ if (err) throw err;
+ console.log(`Output image saved at ${ocrImagePath}`);
+ });
+
imageBuffer = Buffer.from(result["layoutImage"], 'base64');
fs.writeFile(layoutImagePath, imageBuffer, (err) => {
if (err) throw err;
console.log(`Output image saved at ${layoutImagePath}`);
});
- console.log("\nDetected seal impressions:");
- console.log(result["sealImpressions"]);
+ console.log("\nDetected texts:");
+ console.log(result["texts"]);
})
.catch((error) => {
console.log(error);
@@ -917,6 +974,7 @@ axios.request(config)
$API_URL = "http://localhost:8080/seal-recognition";
$image_path = "./demo.jpg";
+$ocr_image_path = "./ocr.jpg";
$layout_image_path = "./layout.jpg";
$image_data = base64_encode(file_get_contents($image_path));
@@ -931,12 +989,14 @@ $response = curl_exec($ch);
curl_close($ch);
$result = json_decode($response, true)["result"];
+file_put_contents($ocr_image_path, base64_decode($result["ocrImage"]));
+echo "Output image saved at " . $ocr_image_path . "\n";
file_put_contents($layout_image_path, base64_decode($result["layoutImage"]));
echo "Output image saved at " . $layout_image_path . "\n";
-echo "\nDetected seal impressions:\n";
-print_r($result["sealImpressions"]);
+echo "\nDetected texts:\n";
+print_r($result["texts"]);
?>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md
index ddea61f389..b0e9e73608 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md
@@ -565,31 +565,19 @@ for res in output:
-sealImpressions |
+texts |
array |
-印章文本识别结果。 |
+文本位置、内容和得分。 |
layoutImage |
string |
版面区域检测结果图。图像为JPEG格式,使用Base64编码。 |
-
-
-sealImpressions
中的每个元素为一个object
,具有如下属性:
-
-
-
-名称 |
-类型 |
-含义 |
-
-
-
-texts |
-array |
-文本位置、内容和得分。 |
+ocrImage |
+string |
+OCR结果图。图像为JPEG格式,使用Base64编码。 |
@@ -632,6 +620,7 @@ import requests
API_URL = "http://localhost:8080/seal-recognition" # 服务URL
image_path = "./demo.jpg"
+ocr_image_path = "./ocr.jpg"
layout_image_path = "./layout.jpg"
# 对本地图像进行Base64编码
@@ -647,11 +636,14 @@ response = requests.post(API_URL, json=payload)
# 处理接口返回数据
assert response.status_code == 200
result = response.json()["result"]
+with open(ocr_image_path, "wb") as file:
+ file.write(base64.b64decode(result["ocrImage"]))
+print(f"Output image saved at {ocr_image_path}")
with open(layout_image_path, "wb") as file:
file.write(base64.b64decode(result["layoutImage"]))
print(f"Output image saved at {layout_image_path}")
-print("\nDetected seal impressions:")
-print(result["sealImpressions"])
+print("\nDetected texts:")
+print(result["texts"])
C++
@@ -664,6 +656,7 @@ print(result["sealImpressions"])
int main() {
httplib::Client client("localhost:8080");
const std::string imagePath = "./demo.jpg";
+ const std::string ocrImagePath = "./ocr.jpg";
const std::string layoutImagePath = "./layout.jpg";
httplib::Headers headers = {
@@ -694,6 +687,18 @@ int main() {
nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
auto result = jsonResponse["result"];
+ encodedImage = result["ocrImage"];
+ std::string decoded_string = base64::from_base64(encodedImage);
+ std::vector<unsigned char> decodedOcrImage(decoded_string.begin(), decoded_string.end());
+ std::ofstream outputOcrFile(ocrImagePath, std::ios::binary | std::ios::out);
+ if (outputOcrFile.is_open()) {
+ outputOcrFile.write(reinterpret_cast<char*>(decodedOcrImage.data()), decodedOcrImage.size());
+ outputOcrFile.close();
+ std::cout << "Output image saved at " << ocrImagePath << std::endl;
+ } else {
+ std::cerr << "Unable to open file for writing: " << ocrImagePath << std::endl;
+ }
+
encodedImage = result["layoutImage"];
decodedString = base64::from_base64(encodedImage);
std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end());
@@ -706,10 +711,10 @@ int main() {
std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl;
}
- auto impressions = result["sealImpressions"];
- std::cout << "\nDetected seal impressions:" << std::endl;
- for (const auto& impression : impressions) {
- std::cout << impression << std::endl;
+ auto texts = result["texts"];
+ std::cout << "\nDetected texts:" << std::endl;
+ for (const auto& text : texts) {
+ std::cout << text << std::endl;
}
} else {
std::cout << "Failed to send HTTP request." << std::endl;
@@ -736,6 +741,7 @@ public class Main {
public static void main(String[] args) throws IOException {
String API_URL = "http://localhost:8080/seal-recognition"; // 服务URL
String imagePath = "./demo.jpg"; // 本地图像
+ String ocrImagePath = "./ocr.jpg";
String layoutImagePath = "./layout.jpg";
// 对本地图像进行Base64编码
@@ -762,8 +768,15 @@ public class Main {
String responseBody = response.body().string();
JsonNode resultNode = objectMapper.readTree(responseBody);
JsonNode result = resultNode.get("result");
+ String ocrBase64Image = result.get("ocrImage").asText();
String layoutBase64Image = result.get("layoutImage").asText();
- JsonNode impressions = result.get("sealImpressions");
+ JsonNode texts = result.get("texts");
+
+ byte[] imageBytes = Base64.getDecoder().decode(ocrBase64Image);
+ try (FileOutputStream fos = new FileOutputStream(ocrImagePath)) {
+ fos.write(imageBytes);
+ }
+ System.out.println("Output image saved at " + ocrBase64Image);
imageBytes = Base64.getDecoder().decode(layoutBase64Image);
try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) {
@@ -771,7 +784,7 @@ public class Main {
}
System.out.println("Output image saved at " + layoutImagePath);
- System.out.println("\nDetected seal impressions: " + impressions.toString());
+ System.out.println("\nDetected texts: " + texts.toString());
} else {
System.err.println("Request failed with code: " + response.code());
}
@@ -796,6 +809,7 @@ import (
func main() {
API_URL := "http://localhost:8080/seal-recognition"
imagePath := "./demo.jpg"
+ ocrImagePath := "./ocr.jpg"
layoutImagePath := "./layout.jpg"
// 对本地图像进行Base64编码
@@ -836,8 +850,9 @@ func main() {
}
type Response struct {
Result struct {
+ OcrImage string `json:"ocrImage"`
LayoutImage string `json:"layoutImage"`
- Impressions []map[string]interface{} `json:"sealImpressions"`
+ Texts []map[string]interface{} `json:"texts"`
} `json:"result"`
}
var respData Response
@@ -847,6 +862,18 @@ func main() {
return
}
+ ocrImageData, err := base64.StdEncoding.DecodeString(respData.Result.OcrImage)
+ if err != nil {
+ fmt.Println("Error decoding base64 image data:", err)
+ return
+ }
+ err = ioutil.WriteFile(ocrImagePath, ocrImageData, 0644)
+ if err != nil {
+ fmt.Println("Error writing image to file:", err)
+ return
+ }
+ fmt.Printf("Image saved at %s.jpg\n", ocrImagePath)
+
layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage)
if err != nil {
fmt.Println("Error decoding base64 image data:", err)
@@ -859,9 +886,9 @@ func main() {
}
fmt.Printf("Image saved at %s.jpg\n", layoutImagePath)
- fmt.Println("\nDetected seal impressions:")
- for _, impression := range respData.Result.Impressions {
- fmt.Println(impression)
+ fmt.Println("\nDetected texts:")
+ for _, text := range respData.Result.Texts {
+ fmt.Println(text)
}
}
@@ -880,6 +907,7 @@ class Program
{
static readonly string API_URL = "http://localhost:8080/seal-recognition";
static readonly string imagePath = "./demo.jpg";
+ static readonly string ocrImagePath = "./ocr.jpg";
static readonly string layoutImagePath = "./layout.jpg";
static async Task Main(string[] args)
@@ -901,13 +929,18 @@ class Program
string responseBody = await response.Content.ReadAsStringAsync();
JObject jsonResponse = JObject.Parse(responseBody);
+ string ocrBase64Image = jsonResponse["result"]["ocrImage"].ToString();
+ byte[] ocrImageBytes = Convert.FromBase64String(ocrBase64Image);
+ File.WriteAllBytes(ocrImagePath, ocrImageBytes);
+ Console.WriteLine($"Output image saved at {ocrImagePath}");
+
string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString();
byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image);
File.WriteAllBytes(layoutImagePath, layoutImageBytes);
Console.WriteLine($"Output image saved at {layoutImagePath}");
- Console.WriteLine("\nDetected seal impressions:");
- Console.WriteLine(jsonResponse["result"]["sealImpressions"].ToString());
+ Console.WriteLine("\nDetected texts:");
+ Console.WriteLine(jsonResponse["result"]["texts"].ToString());
}
}
@@ -919,6 +952,7 @@ const fs = require('fs');
const API_URL = 'http://localhost:8080/seal-recognition'
const imagePath = './demo.jpg'
+const ocrImagePath = "./ocr.jpg";
const layoutImagePath = "./layout.jpg";
let config = {
@@ -942,14 +976,20 @@ axios.request(config)
// 处理接口返回数据
const result = response.data["result"];
+ const imageBuffer = Buffer.from(result["ocrImage"], 'base64');
+ fs.writeFile(ocrImagePath, imageBuffer, (err) => {
+ if (err) throw err;
+ console.log(`Output image saved at ${ocrImagePath}`);
+ });
+
imageBuffer = Buffer.from(result["layoutImage"], 'base64');
fs.writeFile(layoutImagePath, imageBuffer, (err) => {
if (err) throw err;
console.log(`Output image saved at ${layoutImagePath}`);
});
- console.log("\nDetected seal impressions:");
- console.log(result["sealImpressions"]);
+ console.log("\nDetected texts:");
+ console.log(result["texts"]);
})
.catch((error) => {
console.log(error);
@@ -962,6 +1002,7 @@ axios.request(config)
$API_URL = "http://localhost:8080/seal-recognition"; // 服务URL
$image_path = "./demo.jpg";
+$ocr_image_path = "./ocr.jpg";
$layout_image_path = "./layout.jpg";
// 对本地图像进行Base64编码
@@ -979,12 +1020,14 @@ curl_close($ch);
// 处理接口返回数据
$result = json_decode($response, true)["result"];
+file_put_contents($ocr_image_path, base64_decode($result["ocrImage"]));
+echo "Output image saved at " . $ocr_image_path . "\n";
file_put_contents($layout_image_path, base64_decode($result["layoutImage"]));
echo "Output image saved at " . $layout_image_path . "\n";
-echo "\nDetected seal impressions:\n";
-print_r($result["sealImpressions"]);
+echo "\nDetected texts:\n";
+print_r($result["texts"]);
?>
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py
index ae139025f7..82cfef242d 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py
@@ -45,7 +45,8 @@ class Formula(BaseModel):
class InferResult(BaseModel):
formulas: List[Formula]
- image: str
+ layoutImage: str
+ ocrImage: Optional[str] = None
def create_pipeline_app(
@@ -59,6 +60,7 @@ def create_pipeline_app(
"/formula-recognition",
operation_id="infer",
responses={422: {"model": Response}},
+ response_model_exclude_none=True,
)
async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
pipeline = ctx.pipeline
@@ -88,9 +90,16 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
latex=latex,
)
)
- output_image_base64 = serving_utils.base64_encode(
- serving_utils.image_to_bytes(result.img)
+ layout_image_base64 = serving_utils.base64_encode(
+ serving_utils.image_to_bytes(result["layout_result"].img)
)
+ ocr_image = result["formula_result"].img
+ if ocr_image is not None:
+ ocr_image_base64 = serving_utils.base64_encode(
+ serving_utils.image_to_bytes(ocr_image)
+ )
+ else:
+ ocr_image_base64 = None
return ResultResponse(
logId=serving_utils.generate_log_id(),
@@ -98,7 +107,8 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
errorMsg="Success",
result=InferResult(
formulas=formulas,
- image=output_image_base64,
+ layoutImage=layout_image_base64,
+ ocrImage=ocr_image_base64,
),
)
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py
index 7d9ec20fcc..0632f2ef4e 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py
@@ -44,13 +44,10 @@ class Text(BaseModel):
score: float
-class SealImpression(BaseModel):
- texts: List[Text]
-
-
class InferResult(BaseModel):
- sealImpressions: List[SealImpression]
+ texts: List[Text]
layoutImage: str
+ ocrImage: str
def create_pipeline_app(pipeline: SealOCRPipeline, app_config: AppConfig) -> FastAPI:
@@ -81,27 +78,28 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
result = (await pipeline.infer(image))[0]
- seal_impressions: List[SealImpression] = []
- for item in result["ocr_result"]:
- texts: List[Text] = []
- for poly, text, score in zip(
- item["dt_polys"], item["rec_text"], item["rec_score"]
- ):
- texts.append(Text(poly=poly, text=text, score=score))
- seal_impressions.append(SealImpression(texts=texts))
+ texts: List[Text] = []
+ for poly, text, score in zip(
+ result["ocr_result"]["dt_polys"],
+ result["ocr_result"]["rec_text"],
+ result["ocr_result"]["rec_score"],
+ ):
+ texts.append(Text(poly=poly, text=text, score=score))
layout_image_base64 = serving_utils.base64_encode(
serving_utils.image_to_bytes(result["layout_result"].img)
)
-
- # TODO: OCR image
+ ocr_image_base64 = serving_utils.base64_encode(
+ serving_utils.image_to_bytes(result["ocr_result"].img)
+ )
return ResultResponse(
logId=serving_utils.generate_log_id(),
errorCode=0,
errorMsg="Success",
result=InferResult(
- sealImpressions=seal_impressions,
+ texts=texts,
layoutImage=layout_image_base64,
+ ocrImage=ocr_image_base64,
),
)
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py
index 36e221efff..55466bafc7 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py
@@ -88,7 +88,7 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
layout_image_base64 = serving_utils.base64_encode(
serving_utils.image_to_bytes(result["layout_result"].img)
)
- ocr_iamge_base64 = serving_utils.base64_encode(
+ ocr_image_base64 = serving_utils.base64_encode(
serving_utils.image_to_bytes(result["ocr_result"].img)
)
@@ -99,7 +99,7 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
result=InferResult(
tables=tables,
layoutImage=layout_image_base64,
- ocrImage=ocr_iamge_base64,
+ ocrImage=ocr_image_base64,
),
)