ecdye · ecdye · Oct 4, 2024 · Oct 1, 2024 · Oct 1, 2024 · Oct 2, 2024
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -28,7 +28,7 @@ jobs:
 
       - name: Test
         timeout-minutes: 10
-        run: xcrun swift test
+        run: xcrun swift test --parallel --skip "macSubtitleOCRTests.vobSubSUB()"
 
       - name: Periphery
         run: |

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -35,6 +35,7 @@ jobs:
       with:
         languages: swift
         build-mode: autobuild
+        debug: true
 
     - name: Perform CodeQL Analysis
       uses: github/codeql-action/analyze@v3

diff --git a/README.md b/README.md
@@ -7,18 +7,23 @@
 
 ## Overview
 
-**macSubtitleOCR** is a tool written entirely in Swift to convert bitmap subtitles into SubRip subtitle format (SRT) using Optical Character Recognition (OCR).
-Currently, it supports PGS bitmap subtitles, which can be extracted from `.mkv` or `.sup` files.
-The tool leverages the built-in macOS OCR engine, which provides highly accurate text recognition.
+**macSubtitleOCR** is a tool written entirely in Swift that converts bitmap subtitles into the SubRip subtitle format (SRT) using Optical Character Recognition (OCR).
+It currently supports both PGS and VobSub bitmap subtitles.
+The tool utilizes the built-in macOS OCR engine, offering highly accurate text recognition.
 
-For more details on performance, see the [Accuracy](#accuracy) section below.
+For more details on performance, refer to the [Accuracy](#accuracy) section below.
 
 ### Features
 
 - Export `.png` images of subtitles for manual correction of OCR output.
-- Use macOS OCR engine's language recognition feature to improve accuracy by validating character sequences as valid words.
+- Use the macOS OCR engine's language recognition feature to enhance accuracy by validating character sequences as real words.
 - Export raw JSON output from the OCR engine for further analysis.
 
+#### Supported Formats
+
+- PGS (`.mkv`, `.sup`)
+- VobSub (`.sub`, `.idx`)
+
 ### Building the Project
 
 > [!IMPORTANT]
@@ -58,3 +63,4 @@ If you're interested in working on specific features or improvements, check out
 
 - [Presentation Graphic Stream (PGS) Files](https://blog.thescorpius.com/index.php/2017/07/15/presentation-graphic-stream-sup-files-bluray-subtitle-format/)
 - [Matroska Technical Specifications](https://www.matroska.org/technical/elements.html)
+- [DVD Subtitle Stream (VobSub) Files](http://www.mpucoder.com/DVD/index.html)
diff --git a/Sources/macSubtitleOCR/Extensions/CollectionExtensions.swift b/Sources/macSubtitleOCR/Extensions/CollectionExtensions.swift
@@ -0,0 +1,18 @@
+//
+// CollectionExtensions.swift
+// macSubtitleOCR
+//
+// Created by Ethan Dye on 9/30/24.
+// Copyright © 2024 Ethan Dye. All rights reserved.
+//
+
+extension Collection {
+    func unfoldSubSequences(limitedTo maxLength: Int) -> UnfoldSequence<SubSequence, Index> {
+        sequence(state: startIndex) { start in
+            guard start < self.endIndex else { return nil }
+            let end = self.index(start, offsetBy: maxLength, limitedBy: self.endIndex) ?? self.endIndex
+            defer { start = end }
+            return self[start ..< end]
+        }
+    }
+}
diff --git a/Sources/macSubtitleOCR/Extensions/DataExtensions.swift b/Sources/macSubtitleOCR/Extensions/DataExtensions.swift
@@ -27,4 +27,10 @@ extension Data {
             return bytes.reduce(0) { T($0) << 8 + T($1) }
         }
     }
+
+    /* Useful for debugging purposes
+     func hexEncodedString() -> String {
+         map { String(format: "%02hhx", $0) }.joined()
+     }
+     */
 }
diff --git a/Sources/macSubtitleOCR/Extensions/StringProtocolExtensions.swift b/Sources/macSubtitleOCR/Extensions/StringProtocolExtensions.swift
@@ -0,0 +1,14 @@
+//
+// StringProtocolExtensions.swift
+// macSubtitleOCR
+//
+// Created by Ethan Dye on 9/30/24.
+// Copyright © 2024 Ethan Dye. All rights reserved.
+//
+
+import Foundation
+
+extension StringProtocol {
+    var byte: UInt8? { UInt8(self, radix: 16) }
+    var hexToBytes: [UInt8] { unfoldSubSequences(limitedTo: 2).compactMap(\.byte) }
+}
diff --git a/Sources/macSubtitleOCR/Subtitles/PGS/PGS.swift b/Sources/macSubtitleOCR/Subtitles/PGS/PGS.swift
@@ -107,7 +107,8 @@ struct PGS {
                 imageWidth: ods.objectWidth,
                 imageHeight: ods.objectHeight,
                 imageData: ods.imageData,
-                imagePalette: pds.palette)
+                imagePalette: pds.palette,
+                numberOfColors: 256)
         }
     }
 }
diff --git a/Sources/macSubtitleOCR/Subtitles/PGS/Parsers/ODS.swift b/Sources/macSubtitleOCR/Subtitles/PGS/Parsers/ODS.swift
@@ -33,7 +33,8 @@ struct ODS {
     //   0x17: Segment Type; already checked by the caller
     //   2 bytes: Object ID (unused by us)
     //   1 byte: Version number (unused by us)
-    //   1 byte: Sequence flag (0x40: Last in sequence, 0x80: First in sequence, 0xC0: First and last in sequence (0x40 |
+    //   1 byte: Sequence flag (0x40: Last in sequence, 0x80: First in sequence, 0xC0: First and last in sequence (0x40
+    //   |
     //   0x80)
     //   3 bytes: Object data length (unused by us)
     //   2 bytes: Object width
@@ -65,6 +66,6 @@ struct ODS {
 
     private func decodeRLEData() -> Data {
         let rleImageData = RLEData(data: rawImageData, width: objectWidth, height: objectHeight)
-        return rleImageData.decode()
+        return rleImageData.decodePGS()
     }
 }
diff --git a/Sources/macSubtitleOCR/Subtitles/RLE/RLEData.swift b/Sources/macSubtitleOCR/Subtitles/RLE/RLEData.swift
@@ -25,7 +25,7 @@ struct RLEData {
 
     // MARK: - Functions
 
-    func decode() -> Data {
+    func decodePGS() -> Data {
         var pixelCount = 0
         var lineCount = 0
         var iterator = data.makeIterator()
@@ -65,4 +65,90 @@ struct RLEData {
 
         return image
     }
+
+    func decodeVobSub() -> Data {
+        var nibbles = Data()
+        var decodedLines = Data()
+        decodedLines.reserveCapacity(Int(width * height))
+        nibbles.reserveCapacity(data.count * 2)
+
+        // Convert RLE data to nibbles
+        for byte in data {
+            nibbles.append(byte >> 4)
+            nibbles.append(byte & 0x0F)
+        }
+        guard nibbles.count == 2 * data.count
+        else {
+            fatalError("Error: Failed to create nibbles from RLE data.")
+        }
+
+        var i = 0
+        var y = 0
+        var x = 0
+        var currentNibbles: [UInt8?] = [nibbles[i], nibbles[i + 1]]
+        i += 2
+        while currentNibbles[1] != nil, y < height {
+            var nibble = getNibble(currentNibbles: &currentNibbles, nibbles: nibbles, i: &i)
+
+            if nibble < 0x04 {
+                if nibble == 0x00 {
+                    nibble = nibble << 4 | getNibble(currentNibbles: &currentNibbles, nibbles: nibbles, i: &i)
+                    if nibble < 0x04 {
+                        nibble = nibble << 4 | getNibble(currentNibbles: &currentNibbles, nibbles: nibbles, i: &i)
+                    }
+                }
+                nibble = nibble << 4 | getNibble(currentNibbles: &currentNibbles, nibbles: nibbles, i: &i)
+            }
+            let color = UInt8(nibble & 0x03)
+            var run = Int(nibble >> 2)
+
+            if decodedLines.count % width == 0, color != 0, run == 15 {
+                i -= 5
+                currentNibbles = [nibbles[i], nibbles[i + 1]]
+                i += 2
+                continue
+            }
+            x += Int(run)
+
+            if run == 0 || x >= width {
+                run += width - x
+                x = 0
+                y += 1
+                if i % 2 != 0 {
+                    _ = getNibble(currentNibbles: &currentNibbles, nibbles: nibbles, i: &i)
+                }
+            }
+
+            decodedLines.append(contentsOf: repeatElement(color, count: run))
+        }
+
+        return interleaveLines(decodedLines)
+    }
+
+    private func interleaveLines(_ decodedLines: Data) -> Data {
+        var finalImage = Data()
+        finalImage.reserveCapacity(Int(width * height))
+
+        let halfHeight = height / 2
+        let heightOdd = height % 2 != 0
+        for step in stride(from: 0, to: halfHeight, by: 1) {
+            finalImage.append(decodedLines.subdata(in: step * width ..< step * width + width))
+            let oddStepStart = (halfHeight + step + 1) * width
+            let evenStepStart = (halfHeight + step) * width
+            let start = heightOdd ? oddStepStart : evenStepStart
+            let end = heightOdd ? oddStepStart + width : evenStepStart + width
+            finalImage.append(decodedLines.subdata(in: start ..< end))
+        }
+        if height % 2 != 0 {
+            finalImage.append(decodedLines.subdata(in: halfHeight * width ..< halfHeight * width + width))
+        }
+        return finalImage
+    }
+
+    private func getNibble(currentNibbles: inout [UInt8?], nibbles: Data, i: inout Int) -> UInt16 {
+        let nibble = UInt16(currentNibbles.removeFirst()!)
+        currentNibbles.append(nibbles[i])
+        i += 1
+        return nibble
+    }
 }
diff --git a/Sources/macSubtitleOCR/Subtitles/Subtitle.swift b/Sources/macSubtitleOCR/Subtitles/Subtitle.swift
@@ -13,22 +13,31 @@ class Subtitle {
     var index: Int?
     var text: String?
     var startTimestamp: TimeInterval?
+    var imageXOffset: Int?
+    var imageYOffset: Int?
     var imageWidth: Int?
     var imageHeight: Int?
     var imageData: Data?
     var imagePalette: [UInt8]?
+    var imageAlpha: [UInt8]?
+    var numberOfColors: Int?
     var endTimestamp: TimeInterval?
 
     init(index: Int? = nil, text: String? = nil, startTimestamp: TimeInterval? = nil, endTimestamp: TimeInterval? = nil,
-         imageWidth: Int? = nil, imageHeight: Int? = nil, imageData: Data? = nil, imagePalette: [UInt8]? = nil) {
+         imageXOffset: Int? = nil, imageYOffset: Int? = nil, imageWidth: Int? = nil, imageHeight: Int? = nil,
+         imageData: Data? = nil, imagePalette: [UInt8]? = nil, imageAlpha: [UInt8]? = nil, numberOfColors: Int? = nil) {
         self.index = index
         self.text = text
         self.startTimestamp = startTimestamp
         self.endTimestamp = endTimestamp
+        self.imageXOffset = imageXOffset
+        self.imageYOffset = imageYOffset
         self.imageWidth = imageWidth
         self.imageHeight = imageHeight
         self.imageData = imageData
         self.imagePalette = imagePalette
+        self.imageAlpha = imageAlpha
+        self.numberOfColors = numberOfColors
     }
 
     // MARK: - Functions
@@ -66,15 +75,14 @@ class Subtitle {
     // Converts the image data to RGBA format using the palette
     private func imageDataToRGBA() -> Data {
         let bytesPerPixel = 4
-        let numColors = 256 // There are only 256 possible palette entries in a PGS Subtitle
         var rgbaData = Data(capacity: imageWidth! * imageHeight! * bytesPerPixel)
 
         for y in 0 ..< imageHeight! {
             for x in 0 ..< imageWidth! {
                 let index = Int(y) * imageWidth! + Int(x)
                 let colorIndex = Int(imageData![index])
 
-                guard colorIndex < numColors else {
+                guard colorIndex < numberOfColors! else {
                     continue
                 }
 

diff --git a/Sources/macSubtitleOCR/Subtitles/VobSub/MPEG2PacketType.swift b/Sources/macSubtitleOCR/Subtitles/VobSub/MPEG2PacketType.swift
@@ -0,0 +1,12 @@
+//
+// MPEG2PacketType.swift
+// macSubtitleOCR
+//
+// Created by Ethan Dye on 9/30/24.
+// Copyright © 2024 Ethan Dye. All rights reserved.
+//
+
+enum MPEG2PacketType {
+    static let psPacket: UInt32 = 0x0000_01BA
+    static let pesPacket: UInt32 = 0x0000_01BD
+}
diff --git a/Sources/macSubtitleOCR/Subtitles/VobSub/VobSub.swift b/Sources/macSubtitleOCR/Subtitles/VobSub/VobSub.swift
@@ -0,0 +1,50 @@
+//
+// VobSub.swift
+// macSubtitleOCR
+//
+// Created by Ethan Dye on 9/30/24.
+// Copyright © 2024 Ethan Dye. All rights reserved.
+//
+
+import Foundation
+import os
+
+struct VobSub {
+    // MARK: - Properties
+
+    private var logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "VobSub")
+    private(set) var subtitles = [Subtitle]()
+
+    // MARK: - Lifecycle
+
+    init(_ sub: String, _ idx: String) throws {
+        logger.debug("Extracting VobSub subtitles from \(sub) and \(idx)")
+        let subFile = try FileHandle(forReadingFrom: URL(filePath: sub))
+        defer { subFile.closeFile() }
+        let idx = VobSubIDX(URL(filePath: idx))
+        extractSubtitleImages(subFile: subFile, idx: idx)
+    }
+
+    // MARK: - Methods
+
+    private mutating func extractSubtitleImages(subFile: FileHandle, idx: VobSubIDX) {
+        for index in idx.offsets.indices {
+            logger.debug("Index \(index), offset: \(idx.offsets[index]), timestamp: \(idx.timestamps[index])")
+            let offset = idx.offsets[index]
+            let timestamp = idx.timestamps[index]
+            let nextOffset: UInt64 = if index + 1 < idx.offsets.count {
+                idx.offsets[index + 1]
+            } else {
+                subFile.seekToEndOfFile()
+            }
+            let subtitle = VobSubParser(
+                subFile: subFile,
+                timestamp: timestamp,
+                offset: offset,
+                nextOffset: nextOffset,
+                idxPalette: idx.palette).subtitle
+            logger.debug("Found image at offset \(offset) with timestamp \(timestamp)")
+            subtitles.append(subtitle)
+        }
+    }
+}