@@ -51,6 +51,20 @@ static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level,
5151 }
5252}
5353
54+ static std::string GetID (const char *prefix, int page_number, int counter) {
55+ std::stringstream idstr;
56+ // IDs will only have the counter for the first page to keep them consistent
57+ // with the IDs assigned before this change was made.
58+ // From the second page on, IDs will also contain the page number to make them unique.
59+ if (page_number == 0 ) {
60+ idstr << prefix << " _" << counter;
61+ } else {
62+ idstr << prefix << " _" << page_number << " _" << counter;
63+ }
64+
65+ return idstr.str ();
66+ }
67+
5468// /
5569// / Append the ALTO XML for the beginning of the document
5670// /
@@ -168,7 +182,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
168182 case PT_PULLOUT_IMAGE: {
169183 // Handle all kinds of images.
170184 // TODO: optionally add TYPE, for example TYPE="photo".
171- alto_str << " \t\t\t\t <Illustration ID=\" cblock_ " << page_number << " _ " << bcnt++ << " \" " ;
185+ alto_str << " \t\t\t\t <Illustration ID=\" " << GetID ( " cblock " , page_number, bcnt++) << " \" " ;
172186 AddBoxToAlto (res_it.get (), RIL_BLOCK, alto_str);
173187 alto_str << " </Illustration>\n " ;
174188 res_it->Next (RIL_BLOCK);
@@ -177,7 +191,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
177191 case PT_HORZ_LINE:
178192 case PT_VERT_LINE:
179193 // Handle horizontal and vertical lines.
180- alto_str << " \t\t\t\t <GraphicalElement ID=\" cblock_ " << page_number << " _ " << bcnt++ << " \" " ;
194+ alto_str << " \t\t\t\t <GraphicalElement ID=\" " << GetID ( " cblock " , page_number, bcnt++) << " \" " ;
181195 AddBoxToAlto (res_it.get (), RIL_BLOCK, alto_str);
182196 alto_str << " </GraphicalElement >\n " ;
183197 res_it->Next (RIL_BLOCK);
@@ -190,24 +204,24 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
190204 }
191205
192206 if (res_it->IsAtBeginningOf (RIL_BLOCK)) {
193- alto_str << " \t\t\t\t <ComposedBlock ID=\" cblock_ " << page_number << " _ " << bcnt << " \" " ;
207+ alto_str << " \t\t\t\t <ComposedBlock ID=\" " << GetID ( " cblock " , page_number, bcnt) << " \" " ;
194208 AddBoxToAlto (res_it.get (), RIL_BLOCK, alto_str);
195209 alto_str << " \n " ;
196210 }
197211
198212 if (res_it->IsAtBeginningOf (RIL_PARA)) {
199- alto_str << " \t\t\t\t\t <TextBlock ID=\" block_ " << page_number << " _ " << tcnt << " \" " ;
213+ alto_str << " \t\t\t\t\t <TextBlock ID=\" " << GetID ( " block " , page_number, tcnt) << " \" " ;
200214 AddBoxToAlto (res_it.get (), RIL_PARA, alto_str);
201215 alto_str << " \n " ;
202216 }
203217
204218 if (res_it->IsAtBeginningOf (RIL_TEXTLINE)) {
205- alto_str << " \t\t\t\t\t\t <TextLine ID=\" line_ " << page_number << " _ " << lcnt << " \" " ;
219+ alto_str << " \t\t\t\t\t\t <TextLine ID=\" " << GetID ( " line " , page_number, lcnt) << " \" " ;
206220 AddBoxToAlto (res_it.get (), RIL_TEXTLINE, alto_str);
207221 alto_str << " \n " ;
208222 }
209223
210- alto_str << " \t\t\t\t\t\t\t <String ID=\" string_ " << page_number << " _ " << wcnt << " \" " ;
224+ alto_str << " \t\t\t\t\t\t\t <String ID=\" " << GetID ( " string " , page_number, wcnt) << " \" " ;
211225 AddBoxToAlto (res_it.get (), RIL_WORD, alto_str);
212226 alto_str << " CONTENT=\" " ;
213227
0 commit comments