Skip to content

Commit

Permalink
update paddle table extraction http path to use image dimensiosns (#473)
Browse files Browse the repository at this point in the history
  • Loading branch information
edknv authored Feb 20, 2025
1 parent 2de82df commit b3df4b0
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/nv_ingest/util/nim/paddle.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,12 @@ def chunk_list(lst, chunk_size):
base64_list = [data["base64_image"]]

input_list: List[Dict[str, Any]] = []
for b64 in base64_list:
for b64, img in zip(base64_list, images):
image_url = f"data:image/png;base64,{b64}"
image_obj = {"type": "image_url", "url": image_url}
input_list.append(image_obj)
_dims = {"new_width": img.shape[0], "new_height": img.shape[1]}
dims.append(_dims)

batches = []
batch_data_list = []
Expand All @@ -167,6 +169,7 @@ def chunk_list(lst, chunk_size):
payload = {"input": input_chunk}
batches.append(payload)
batch_data_list.append({"image_arrays": orig_chunk, "image_dims": dims_chunk})

return batches, batch_data_list

else:
Expand Down Expand Up @@ -255,7 +258,6 @@ def _prepare_paddle_payload(self, base64_img: str) -> Dict[str, Any]:
def _extract_content_from_paddle_http_response(
self,
json_response: Dict[str, Any],
table_content_format: Optional[str],
) -> List[Tuple[str, str]]:
"""
Extract content from the JSON response of a PaddleOCR HTTP API request.
Expand Down

0 comments on commit b3df4b0

Please sign in to comment.