-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpage-extract-lines.xsl
71 lines (71 loc) · 2.75 KB
/
page-extract-lines.xsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15">
<!-- rid of xml syntax: -->
<xsl:output
method="text"
standalone="yes"
omit-xml-declaration="yes"/>
<!-- copy text element verbatim: -->
<xsl:variable name="newline"><xsl:text>
</xsl:text>
</xsl:variable>
<!-- paragraph break -->
<xsl:param name="pb" select="concat($newline,$newline)"/>
<!-- line break -->
<xsl:param name="lb" select="$newline"/>
<!-- text order: by element or by explicit ReadingOrder -->
<xsl:param name="order" select="'reading-order'"/>
<!-- use key mechanism for IDREFs, because XSD does not support id mechanism -->
<xsl:key name="textRegion" match="pc:TextRegion" use="@id"/>
<xsl:template match="pc:PcGts/pc:Page">
<xsl:variable name="regions" select="//pc:TextRegion"/>
<xsl:choose>
<xsl:when test="starts-with($order, 'reading-order') and pc:ReadingOrder//*[@regionRef|@regionRefIndexed]">
<xsl:call-template name="getrefs">
<xsl:with-param name="group" select="pc:ReadingOrder/*"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:for-each select="$regions">
<xsl:call-template name="getlines">
<xsl:with-param name="region" select="."/>
</xsl:call-template>
<xsl:value-of select="$pb"/>
</xsl:for-each>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="getlines">
<xsl:param name="region"/>
<xsl:for-each select="$region/pc:TextLine">
<xsl:if test="position()>1">
<xsl:value-of select="$lb"/>
</xsl:if>
<xsl:value-of select="pc:TextEquiv[1]/pc:Unicode" disable-output-escaping="yes"/>
</xsl:for-each>
</xsl:template>
<xsl:template name="getrefs">
<xsl:param name="group"/>
<xsl:for-each select="$group/*">
<xsl:sort select="@index" data-type="number"/>
<!--<xsl:variable name="region" select="id(@regionRef|@regionRefIndexed)"/>-->
<xsl:variable name="region" select="key('textRegion', @regionRef|@regionRefIndexed)"/>
<xsl:if test="$region">
<xsl:call-template name="getlines">
<xsl:with-param name="region" select="$region"/>
</xsl:call-template>
<xsl:value-of select="$pb"/>
</xsl:if>
<!-- UnorderedGroup(Indexed) and OrderedGroup(Indexed): recurse -->
<xsl:if test="contains(local-name(.), 'Group')">
<xsl:call-template name="getrefs">
<xsl:with-param name="group" select="."/>
</xsl:call-template>
</xsl:if>
</xsl:for-each>
</xsl:template>
<!-- override implicit rules copying elements and attributes: -->
<xsl:template match="text()"/>
</xsl:stylesheet>