Skip to content

Commit

Permalink
Added format unit tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
haogatyp authored and oliver-stoehr committed Oct 4, 2023
1 parent 4cdd512 commit 8236ba9
Show file tree
Hide file tree
Showing 13 changed files with 1,268 additions and 0 deletions.
66 changes: 66 additions & 0 deletions Tests/Fixtures/Format/alto.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto:alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:alto="http://www.loc.gov/standards/alto/ns-v2#">
<alto:Description>
<alto:MeasurementUnit>pixel</alto:MeasurementUnit>
<alto:OCRProcessing ID="IdOcr">
<alto:ocrProcessingStep>
<alto:processingDateTime>2020-05-14</alto:processingDateTime>
<alto:processingSoftware>
<alto:softwareCreator>ABBYY</alto:softwareCreator>
<alto:softwareName>ABBYY FineReader Engine</alto:softwareName>
<alto:softwareVersion>12</alto:softwareVersion>
</alto:processingSoftware>
</alto:ocrProcessingStep>
</alto:OCRProcessing>
</alto:Description>
<alto:Styles> </alto:Styles>
<alto:Layout>
<alto:Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<alto:PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<alto:Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<alto:TextBlock ID="Page1_Block2" HEIGHT="241" WIDTH="1064" VPOS="2068" HPOS="470" language="de">
<alto:Shape>
<alto:Polygon POINTS="1506,2068 1533,2068 1533,2283 1534,2283 1534,2306 1509,2306 1509,2307 1104,2307 1104,2308 700,2308 700,2309 471,2309 471,2286 470,2286 470,2071 697,2071 697,2070 1101,2070 1101,2069 1506,2069 1506,2068"/>
</alto:Shape>
<alto:TextLine HEIGHT="102" WIDTH="628" VPOS="2076" HPOS="477">
<alto:String WC="0.79777777194976807" CONTENT="Bürgertum" HEIGHT="95" WIDTH="437" VPOS="2083" HPOS="477"/>
<alto:SP WIDTH="34" VPOS="2107" HPOS="915"/>
<alto:String WC="0.66333335638046265" CONTENT="und" HEIGHT="76" WIDTH="155" VPOS="2076" HPOS="950"/>
</alto:TextLine>
<alto:TextLine HEIGHT="104" WIDTH="1051" VPOS="2199" HPOS="477">
<alto:String WC="0.83142858743667603" CONTENT="Bürgerlichkeit" HEIGHT="102" WIDTH="574" VPOS="2201" HPOS="477"/>
<alto:SP WIDTH="32" VPOS="2206" HPOS="1051"/>
<alto:String WC="1." CONTENT="in" HEIGHT="68" WIDTH="74" VPOS="2205" HPOS="1084"/>
<alto:SP WIDTH="34" VPOS="2204" HPOS="1159"/>
<alto:String WC="0.8028571605682373" CONTENT="Dresden" HEIGHT="75" WIDTH="333" VPOS="2199" HPOS="1194"/>
</alto:TextLine>
</alto:TextBlock>
<alto:TextBlock ID="Page1_Block3" HEIGHT="290" WIDTH="775" VPOS="307" HPOS="466" language="de">
<alto:Shape>
<alto:Polygon POINTS="1101,307 1241,307 1241,595 1104,595 1104,596 700,596 700,597 466,597 466,309 697,309 697,308 1101,308 1101,307"/>
</alto:Shape>
<alto:TextLine HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473">
<alto:String WC="0.75625002384185791" CONTENT="DRESDNER" HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473"/>
</alto:TextLine>
<alto:TextLine HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473">
<alto:String WC="0.70399999618530273" CONTENT="HEFTE" HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473"/>
</alto:TextLine>
</alto:TextBlock>
<alto:GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<alto:GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<alto:GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<alto:GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<alto:Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<alto:Shape>
<alto:Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</alto:Shape>
</alto:Illustration>
<alto:Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</alto:PrintSpace>
</alto:Page>
</alto:Layout>
</alto:alto>
59 changes: 59 additions & 0 deletions Tests/Fixtures/Format/altoNoString.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto:alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:alto="http://www.loc.gov/standards/alto/ns-v2#">
<alto:Description>
<alto:MeasurementUnit>pixel</alto:MeasurementUnit>
<alto:OCRProcessing ID="IdOcr">
<alto:ocrProcessingStep>
<alto:processingDateTime>2020-05-14</alto:processingDateTime>
<alto:processingSoftware>
<alto:softwareCreator>ABBYY</alto:softwareCreator>
<alto:softwareName>ABBYY FineReader Engine</alto:softwareName>
<alto:softwareVersion>12</alto:softwareVersion>
</alto:processingSoftware>
</alto:ocrProcessingStep>
</alto:OCRProcessing>
</alto:Description>
<alto:Styles> </alto:Styles>
<alto:Layout>
<alto:Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<alto:PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<alto:Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<alto:TextBlock ID="Page1_Block2" HEIGHT="241" WIDTH="1064" VPOS="2068" HPOS="470" language="de">
<alto:Shape>
<alto:Polygon POINTS="1506,2068 1533,2068 1533,2283 1534,2283 1534,2306 1509,2306 1509,2307 1104,2307 1104,2308 700,2308 700,2309 471,2309 471,2286 470,2286 470,2071 697,2071 697,2070 1101,2070 1101,2069 1506,2069 1506,2068"/>
</alto:Shape>
<alto:TextLine HEIGHT="102" WIDTH="628" VPOS="2076" HPOS="477">
<alto:SP WIDTH="34" VPOS="2107" HPOS="915"/>
</alto:TextLine>
<alto:TextLine HEIGHT="104" WIDTH="1051" VPOS="2199" HPOS="477">
<alto:SP WIDTH="32" VPOS="2206" HPOS="1051"/>
<alto:SP WIDTH="34" VPOS="2204" HPOS="1159"/>
</alto:TextLine>
</alto:TextBlock>
<alto:TextBlock ID="Page1_Block3" HEIGHT="290" WIDTH="775" VPOS="307" HPOS="466" language="de">
<alto:Shape>
<alto:Polygon POINTS="1101,307 1241,307 1241,595 1104,595 1104,596 700,596 700,597 466,597 466,309 697,309 697,308 1101,308 1101,307"/>
</alto:Shape>
<alto:TextLine HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473">
</alto:TextLine>
<alto:TextLine HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473">
</alto:TextLine>
</alto:TextBlock>
<alto:GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<alto:GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<alto:GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<alto:GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<alto:Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<alto:Shape>
<alto:Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</alto:Shape>
</alto:Illustration>
<alto:Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</alto:PrintSpace>
</alto:Page>
</alto:Layout>
</alto:alto>
38 changes: 38 additions & 0 deletions Tests/Fixtures/Format/altoNoTextBlock.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto:alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:alto="http://www.loc.gov/standards/alto/ns-v2#">
<alto:Description>
<alto:MeasurementUnit>pixel</alto:MeasurementUnit>
<alto:OCRProcessing ID="IdOcr">
<alto:ocrProcessingStep>
<alto:processingDateTime>2020-05-14</alto:processingDateTime>
<alto:processingSoftware>
<alto:softwareCreator>ABBYY</alto:softwareCreator>
<alto:softwareName>ABBYY FineReader Engine</alto:softwareName>
<alto:softwareVersion>12</alto:softwareVersion>
</alto:processingSoftware>
</alto:ocrProcessingStep>
</alto:OCRProcessing>
</alto:Description>
<alto:Styles> </alto:Styles>
<alto:Layout>
<alto:Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<alto:PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<alto:Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<alto:GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<alto:GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<alto:GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<alto:GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<alto:Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<alto:Shape>
<alto:Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</alto:Shape>
</alto:Illustration>
<alto:Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</alto:PrintSpace>
</alto:Page>
</alto:Layout>
</alto:alto>
48 changes: 48 additions & 0 deletions Tests/Fixtures/Format/altoNoTextLine.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?xml version="1.0" encoding="UTF-8"?>
<alto:alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:alto="http://www.loc.gov/standards/alto/ns-v2#">
<alto:Description>
<alto:MeasurementUnit>pixel</alto:MeasurementUnit>
<alto:OCRProcessing ID="IdOcr">
<alto:ocrProcessingStep>
<alto:processingDateTime>2020-05-14</alto:processingDateTime>
<alto:processingSoftware>
<alto:softwareCreator>ABBYY</alto:softwareCreator>
<alto:softwareName>ABBYY FineReader Engine</alto:softwareName>
<alto:softwareVersion>12</alto:softwareVersion>
</alto:processingSoftware>
</alto:ocrProcessingStep>
</alto:OCRProcessing>
</alto:Description>
<alto:Styles> </alto:Styles>
<alto:Layout>
<alto:Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801">
<alto:PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0">
<alto:Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/>
<alto:TextBlock ID="Page1_Block2" HEIGHT="241" WIDTH="1064" VPOS="2068" HPOS="470" language="de">
<alto:Shape>
<alto:Polygon POINTS="1506,2068 1533,2068 1533,2283 1534,2283 1534,2306 1509,2306 1509,2307 1104,2307 1104,2308 700,2308 700,2309 471,2309 471,2286 470,2286 470,2071 697,2071 697,2070 1101,2070 1101,2069 1506,2069 1506,2068"/>
</alto:Shape>
</alto:TextBlock>
<alto:TextBlock ID="Page1_Block3" HEIGHT="290" WIDTH="775" VPOS="307" HPOS="466" language="de">
<alto:Shape>
<alto:Polygon POINTS="1101,307 1241,307 1241,595 1104,595 1104,596 700,596 700,597 466,597 466,309 697,309 697,308 1101,308 1101,307"/>
</alto:Shape>
</alto:TextBlock>
<alto:GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/>
<alto:GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/>
<alto:GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/>
<alto:GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/>
<alto:GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/>
<alto:Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544">
<alto:Shape>
<alto:Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/>
</alto:Shape>
</alto:Illustration>
<alto:Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/>
</alto:PrintSpace>
</alto:Page>
</alto:Layout>
</alto:alto>
59 changes: 59 additions & 0 deletions Tests/Fixtures/Format/audioVideo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/"
xmlns:mods="http://www.loc.gov/mods/v3"
xmlns:videomd="http://www.loc.gov/videoMD/"
xmlns:audiomd="http://www.loc.gov/audioMD/"
xmlns:dv="http://dfg-viewer.de/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/mods.xsd http://www.loc.gov/videoMD/ https://www.loc.gov/standards/amdvmd/videoMD.xsd">
<mets:amdSec ID="AMD">
<mets:techMD ID="VIDEOMD">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="VIDEOMD">
<mets:xmlData>
<videomd:VIDEOMD ANALOGDIGITALFLAG="FileDigital">
<videomd:fileData>
<videomd:color>Color</videomd:color>
<videomd:compression>
<videomd:codecCreatorApp>Phoenix Finish</videomd:codecCreatorApp>
<videomd:codecName>Apple ProRes 4444</videomd:codecName>
</videomd:compression>
<videomd:frameRate mode="Fixed">24</videomd:frameRate>
<videomd:sound>Yes</videomd:sound>
</videomd:fileData>
<videomd:videoInfo>
<videomd:aspectRatio>1.375:1</videomd:aspectRatio>
<videomd:dimensions LENGTH="1808" UNITS="frames" />
<videomd:duration>00:01:30.07</videomd:duration>
<videomd:frame>
<videomd:frameRate>24</videomd:frameRate>
</videomd:frame>
</videomd:videoInfo>
</videomd:VIDEOMD>
</mets:xmlData>
</mets:mdWrap>
</mets:techMD>
<mets:techMD ID="AUDIOMD">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="AUDIOMD">
<mets:xmlData>
<audiomd:AUDIOMD ANALOGDIGITALFLAG="FileDigital">
<audiomd:audioInfo>
<audiomd:duration>01:10:35.08</audiomd:duration>
</audiomd:audioInfo>
</audiomd:AUDIOMD>
</mets:xmlData>
</mets:mdWrap>
</mets:techMD>
<mets:techMD ID="AUDIOMD">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="AUDIOMD">
<mets:xmlData>
<audiomd:AUDIOMD ANALOGDIGITALFLAG="FileDigital">
<audiomd:audioInfo>
<audiomd:duration></audiomd:duration>
</audiomd:audioInfo>
</audiomd:AUDIOMD>
</mets:xmlData>
</mets:mdWrap>
</mets:techMD>
</mets:amdSec>
</mets:mets>
Loading

0 comments on commit 8236ba9

Please sign in to comment.