diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index e373f73aa4..602649c012 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -51,6 +51,20 @@ static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level, } } +static std::string GetID(char const * prefix, int page_number, int counter) { + std::stringstream idstr; + // IDs will only have the counter for the first page to keep them consistent + // with the IDs assigned before this change was made. + // From the second page on, IDs will also contain the page number to make them unique. + if (page_number == 0) { + idstr << prefix << "_" << counter; + } else { + idstr << prefix << "_" << page_number << "_" << counter; + } + + return idstr.str(); +} + /// /// Append the ALTO XML for the beginning of the document /// @@ -168,7 +182,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { case PT_PULLOUT_IMAGE: { // Handle all kinds of images. // TODO: optionally add TYPE, for example TYPE="photo". - alto_str << "\t\t\t\t\n"; res_it->Next(RIL_BLOCK); @@ -177,7 +191,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { case PT_HORZ_LINE: case PT_VERT_LINE: // Handle horizontal and vertical lines. - alto_str << "\t\t\t\t\n"; res_it->Next(RIL_BLOCK); @@ -190,24 +204,24 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { } if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - alto_str << "\t\t\t\tIsAtBeginningOf(RIL_PARA)) { - alto_str << "\t\t\t\t\tIsAtBeginningOf(RIL_TEXTLINE)) { - alto_str << "\t\t\t\t\t\t