Skip to content

Commit

Permalink
Improve uts46 file parsing
Browse files Browse the repository at this point in the history
- Make header a struct.
- Automatically load from the owning bundle if necessary (and make UTS46 a class so Bundle(for:) works).
  • Loading branch information
Wevah committed Apr 18, 2020
1 parent 5230a5d commit 6178ffc
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 14 deletions.
8 changes: 4 additions & 4 deletions Punycode.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
B27409C92443E45B00E567E7 /* Data+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = B27409C82443E45B00E567E7 /* Data+Extensions.swift */; };
B27409CA2443F66900E567E7 /* Data+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = B27409C82443E45B00E567E7 /* Data+Extensions.swift */; };
B2AF210724313283009F5EE7 /* UTS46.swift in Sources */ = {isa = PBXBuildFile; fileRef = B2AF210624313283009F5EE7 /* UTS46.swift */; };
B2AF21092431376E009F5EE7 /* uts46.xz in Resources */ = {isa = PBXBuildFile; fileRef = B2AF21082431376E009F5EE7 /* uts46.xz */; };
B2AF21092431376E009F5EE7 /* uts46 in Resources */ = {isa = PBXBuildFile; fileRef = B2AF21082431376E009F5EE7 /* uts46 */; };
B2B4F3D0242EB79E00CC752F /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = B2B4F3CF242EB79E00CC752F /* main.swift */; };
B2C3DD45241FECF200484850 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = B2C3DD44241FECF200484850 /* AppDelegate.swift */; };
B2C3DD47241FECF400484850 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B2C3DD46241FECF400484850 /* Assets.xcassets */; };
Expand Down Expand Up @@ -140,7 +140,7 @@
B26FA03C2055425C00CD6AF1 /* libicucore.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libicucore.tbd; path = usr/lib/libicucore.tbd; sourceTree = SDKROOT; };
B27409C82443E45B00E567E7 /* Data+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Data+Extensions.swift"; sourceTree = "<group>"; };
B2AF210624313283009F5EE7 /* UTS46.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UTS46.swift; sourceTree = "<group>"; };
B2AF21082431376E009F5EE7 /* uts46.xz */ = {isa = PBXFileReference; lastKnownFileType = file; path = uts46.xz; sourceTree = "<group>"; };
B2AF21082431376E009F5EE7 /* uts46 */ = {isa = PBXFileReference; lastKnownFileType = file; path = uts46; sourceTree = "<group>"; };
B2B4F3CD242EB79E00CC752F /* icumap2code */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = icumap2code; sourceTree = BUILT_PRODUCTS_DIR; };
B2B4F3CF242EB79E00CC752F /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = "<group>"; };
B2C3DCC1241FE95B00484850 /* String+Punycode.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "String+Punycode.swift"; sourceTree = "<group>"; };
Expand Down Expand Up @@ -347,7 +347,7 @@
children = (
B25753801F3CE8B2002606DD /* WebNSURLExtras.h */,
B2C59E7224278AF400183346 /* uidna-min.h */,
B2AF21082431376E009F5EE7 /* uts46.xz */,
B2AF21082431376E009F5EE7 /* uts46 */,
B2C3DCC0241FE94400484850 /* Swift */,
B2C3DCBF241FE92F00484850 /* Objective-C */,
);
Expand Down Expand Up @@ -644,7 +644,7 @@
files = (
B2C3DD47241FECF400484850 /* Assets.xcassets in Resources */,
B2C3DD56241FEE5A00484850 /* MainMenu.xib in Resources */,
B2AF21092431376E009F5EE7 /* uts46.xz in Resources */,
B2AF21092431376E009F5EE7 /* uts46 in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down
4 changes: 4 additions & 0 deletions Shared/Swift/String+Punycode.swift
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,8 @@ private extension String {
/// - Returns: The mapped string.
/// - Throws: `UTS46MapError`
func mapUTS46() throws -> String {
try! UTS46.loadIfNecessary()

var result = ""

for scalar in self.unicodeScalars {
Expand Down Expand Up @@ -478,6 +480,8 @@ private extension String {
///
/// See [RFC 5892, Appendix A.1 and A.2](https://tools.ietf.org/html/rfc5892#appendix-A).
var hasValidJoiners: Bool {
try! UTS46.loadIfNecessary()

let scalars = self.unicodeScalars

for index in scalars.indices {
Expand Down
105 changes: 95 additions & 10 deletions Shared/Swift/UTS46.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
import Foundation
import Compression

enum UTS46 {
class UTS46 {

private(set) static var characterMap: [UInt32: String] = [:]
private(set) static var ignoredCharacters: CharacterSet = []
private(set) static var disallowedCharacters: CharacterSet = []
private(set) static var joiningTypes = [UInt32: JoiningType]()

private static var isLoaded = false

private enum Marker {
static let characterMap = UInt8.max
static let ignoredCharacters = UInt8.max - 1
Expand All @@ -38,19 +40,95 @@ enum UTS46 {
case badSize
case decompressionError
case badMarker
case unknownDataVersion
}

/// Identical values to `NSData.CompressionAlgorithm`.
enum CompressionAlgorithm: Int {
case lzfse = 0
case lz4 = 1
case lzma = 2
case zlib = 3
}

struct Header: RawRepresentable, CustomDebugStringConvertible {
var rawValue: [UInt8] {
return Self.signature + [UInt8(0), version, flags.rawValue]
}

private static let compressionMask: UInt8 = 0x07
private static let signature: [UInt8] = Array("UTS#46".utf8)

struct Flags: RawRepresentable {
var rawValue: UInt8 {
return (hasCRC ? hasCRCMask : 0) | UInt8(compression != nil ? compression!.rawValue + 1 : 0)
}

var hasCRC: Bool
var compression: CompressionAlgorithm?

private let hasCRCMask: UInt8 = 1 << 3
private let compressionMask: UInt8 = 0x7

init(rawValue: UInt8) {
hasCRC = rawValue & hasCRCMask != 0
let compressionBits = rawValue & compressionMask

if (1...4).contains(compressionBits) {
compression = CompressionAlgorithm(rawValue: Int(compressionBits) - 1)
}
}

init(compression: CompressionAlgorithm? = nil, hasCRC: Bool = false) {
self.compression = compression
self.hasCRC = hasCRC
}
}

let version: UInt8
var flags: Flags
var hasCRC: Bool { flags.hasCRC }
var compression: CompressionAlgorithm? { flags.compression }
var dataOffset: Int { 8 + (flags.hasCRC ? 4 : 0) }

init?<T: DataProtocol>(rawValue: T) where T.Index == Int {
guard rawValue.count == 8 else { return nil }
guard rawValue.prefix(Self.signature.count).elementsEqual(Self.signature) else { return nil }

version = rawValue[6]
flags = Flags(rawValue: rawValue[7])
}

init(compression: CompressionAlgorithm? = nil, hasCRC: Bool = false) {
self.version = 1
self.flags = Flags(compression: compression, hasCRC: hasCRC)
}

var debugDescription: String { "has CRC: \(hasCRC); compression: \(compression != nil ? String(describing: compression!) : "none")" }
}

enum CompressionAlgorithm {
case lzfse
case lz4
case lzma
case zlib
private static func parseHeader(from data: Data) throws -> Header? {
let headerData = data.prefix(8)

guard headerData.count == 8 else { throw UTS46Error.badSize }

return Header(rawValue: headerData)
}

static func load(from url: URL, compression: CompressionAlgorithm?) throws {
let compressedData = try Data(contentsOf: url)
static func load(from url: URL) throws {
let fileData = try Data(contentsOf: url)

guard let data = self.decompress(data: compressedData, algorithm: compression) else {
guard let header = try? parseHeader(from: fileData) else { return }

guard header.version == 1 else { throw UTS46Error.unknownDataVersion }

let offset = 8 + (header.hasCRC ? 4 : 0)

guard fileData.count > offset else { throw UTS46Error.badSize }

let compressedData = fileData[offset...]

guard let data = self.decompress(data: compressedData, algorithm: header.compression) else {
throw UTS46Error.decompressionError
}

Expand All @@ -75,6 +153,14 @@ enum UTS46 {
}
}

isLoaded = true
}

static func loadIfNecessary() throws {
guard !isLoaded else { return }
guard let url = Bundle(for: Self.self).url(forResource: "uts46", withExtension: nil) else { throw CocoaError(.fileNoSuchFile) }

try load(from: url)
}

private static func decompress(data: Data, algorithm: CompressionAlgorithm?) -> Data? {
Expand All @@ -92,7 +178,6 @@ enum UTS46 {
rawAlgorithm = COMPRESSION_ZLIB
default:
return data

}

let capacity = 100_000
Expand Down
Binary file renamed Shared/uts46.xz → Shared/uts46
Binary file not shown.

0 comments on commit 6178ffc

Please sign in to comment.