diff --git a/Punycode.xcodeproj/project.pbxproj b/Punycode.xcodeproj/project.pbxproj index 4aa6a88..7ab8060 100644 --- a/Punycode.xcodeproj/project.pbxproj +++ b/Punycode.xcodeproj/project.pbxproj @@ -34,7 +34,7 @@ B27409C92443E45B00E567E7 /* Data+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = B27409C82443E45B00E567E7 /* Data+Extensions.swift */; }; B27409CA2443F66900E567E7 /* Data+Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = B27409C82443E45B00E567E7 /* Data+Extensions.swift */; }; B2AF210724313283009F5EE7 /* UTS46.swift in Sources */ = {isa = PBXBuildFile; fileRef = B2AF210624313283009F5EE7 /* UTS46.swift */; }; - B2AF21092431376E009F5EE7 /* uts46.xz in Resources */ = {isa = PBXBuildFile; fileRef = B2AF21082431376E009F5EE7 /* uts46.xz */; }; + B2AF21092431376E009F5EE7 /* uts46 in Resources */ = {isa = PBXBuildFile; fileRef = B2AF21082431376E009F5EE7 /* uts46 */; }; B2B4F3D0242EB79E00CC752F /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = B2B4F3CF242EB79E00CC752F /* main.swift */; }; B2C3DD45241FECF200484850 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = B2C3DD44241FECF200484850 /* AppDelegate.swift */; }; B2C3DD47241FECF400484850 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = B2C3DD46241FECF400484850 /* Assets.xcassets */; }; @@ -140,7 +140,7 @@ B26FA03C2055425C00CD6AF1 /* libicucore.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libicucore.tbd; path = usr/lib/libicucore.tbd; sourceTree = SDKROOT; }; B27409C82443E45B00E567E7 /* Data+Extensions.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Data+Extensions.swift"; sourceTree = ""; }; B2AF210624313283009F5EE7 /* UTS46.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UTS46.swift; sourceTree = ""; }; - B2AF21082431376E009F5EE7 /* uts46.xz */ = {isa = PBXFileReference; lastKnownFileType = file; path = uts46.xz; sourceTree = ""; }; + B2AF21082431376E009F5EE7 /* uts46 */ = {isa = PBXFileReference; lastKnownFileType = file; path = uts46; sourceTree = ""; }; B2B4F3CD242EB79E00CC752F /* icumap2code */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = icumap2code; sourceTree = BUILT_PRODUCTS_DIR; }; B2B4F3CF242EB79E00CC752F /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; B2C3DCC1241FE95B00484850 /* String+Punycode.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "String+Punycode.swift"; sourceTree = ""; }; @@ -347,7 +347,7 @@ children = ( B25753801F3CE8B2002606DD /* WebNSURLExtras.h */, B2C59E7224278AF400183346 /* uidna-min.h */, - B2AF21082431376E009F5EE7 /* uts46.xz */, + B2AF21082431376E009F5EE7 /* uts46 */, B2C3DCC0241FE94400484850 /* Swift */, B2C3DCBF241FE92F00484850 /* Objective-C */, ); @@ -644,7 +644,7 @@ files = ( B2C3DD47241FECF400484850 /* Assets.xcassets in Resources */, B2C3DD56241FEE5A00484850 /* MainMenu.xib in Resources */, - B2AF21092431376E009F5EE7 /* uts46.xz in Resources */, + B2AF21092431376E009F5EE7 /* uts46 in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Shared/Swift/String+Punycode.swift b/Shared/Swift/String+Punycode.swift index e535c72..bbf78b6 100644 --- a/Shared/Swift/String+Punycode.swift +++ b/Shared/Swift/String+Punycode.swift @@ -441,6 +441,8 @@ private extension String { /// - Returns: The mapped string. /// - Throws: `UTS46MapError` func mapUTS46() throws -> String { + try! UTS46.loadIfNecessary() + var result = "" for scalar in self.unicodeScalars { @@ -478,6 +480,8 @@ private extension String { /// /// See [RFC 5892, Appendix A.1 and A.2](https://tools.ietf.org/html/rfc5892#appendix-A). var hasValidJoiners: Bool { + try! UTS46.loadIfNecessary() + let scalars = self.unicodeScalars for index in scalars.indices { diff --git a/Shared/Swift/UTS46.swift b/Shared/Swift/UTS46.swift index c84883a..9f85bb5 100644 --- a/Shared/Swift/UTS46.swift +++ b/Shared/Swift/UTS46.swift @@ -8,13 +8,15 @@ import Foundation import Compression -enum UTS46 { +class UTS46 { private(set) static var characterMap: [UInt32: String] = [:] private(set) static var ignoredCharacters: CharacterSet = [] private(set) static var disallowedCharacters: CharacterSet = [] private(set) static var joiningTypes = [UInt32: JoiningType]() + private static var isLoaded = false + private enum Marker { static let characterMap = UInt8.max static let ignoredCharacters = UInt8.max - 1 @@ -38,19 +40,95 @@ enum UTS46 { case badSize case decompressionError case badMarker + case unknownDataVersion + } + + /// Identical values to `NSData.CompressionAlgorithm`. + enum CompressionAlgorithm: Int { + case lzfse = 0 + case lz4 = 1 + case lzma = 2 + case zlib = 3 + } + + struct Header: RawRepresentable, CustomDebugStringConvertible { + var rawValue: [UInt8] { + return Self.signature + [UInt8(0), version, flags.rawValue] + } + + private static let compressionMask: UInt8 = 0x07 + private static let signature: [UInt8] = Array("UTS#46".utf8) + + struct Flags: RawRepresentable { + var rawValue: UInt8 { + return (hasCRC ? hasCRCMask : 0) | UInt8(compression != nil ? compression!.rawValue + 1 : 0) + } + + var hasCRC: Bool + var compression: CompressionAlgorithm? + + private let hasCRCMask: UInt8 = 1 << 3 + private let compressionMask: UInt8 = 0x7 + + init(rawValue: UInt8) { + hasCRC = rawValue & hasCRCMask != 0 + let compressionBits = rawValue & compressionMask + + if (1...4).contains(compressionBits) { + compression = CompressionAlgorithm(rawValue: Int(compressionBits) - 1) + } + } + + init(compression: CompressionAlgorithm? = nil, hasCRC: Bool = false) { + self.compression = compression + self.hasCRC = hasCRC + } + } + + let version: UInt8 + var flags: Flags + var hasCRC: Bool { flags.hasCRC } + var compression: CompressionAlgorithm? { flags.compression } + var dataOffset: Int { 8 + (flags.hasCRC ? 4 : 0) } + + init?(rawValue: T) where T.Index == Int { + guard rawValue.count == 8 else { return nil } + guard rawValue.prefix(Self.signature.count).elementsEqual(Self.signature) else { return nil } + + version = rawValue[6] + flags = Flags(rawValue: rawValue[7]) + } + + init(compression: CompressionAlgorithm? = nil, hasCRC: Bool = false) { + self.version = 1 + self.flags = Flags(compression: compression, hasCRC: hasCRC) + } + + var debugDescription: String { "has CRC: \(hasCRC); compression: \(compression != nil ? String(describing: compression!) : "none")" } } - enum CompressionAlgorithm { - case lzfse - case lz4 - case lzma - case zlib + private static func parseHeader(from data: Data) throws -> Header? { + let headerData = data.prefix(8) + + guard headerData.count == 8 else { throw UTS46Error.badSize } + + return Header(rawValue: headerData) } - static func load(from url: URL, compression: CompressionAlgorithm?) throws { - let compressedData = try Data(contentsOf: url) + static func load(from url: URL) throws { + let fileData = try Data(contentsOf: url) - guard let data = self.decompress(data: compressedData, algorithm: compression) else { + guard let header = try? parseHeader(from: fileData) else { return } + + guard header.version == 1 else { throw UTS46Error.unknownDataVersion } + + let offset = 8 + (header.hasCRC ? 4 : 0) + + guard fileData.count > offset else { throw UTS46Error.badSize } + + let compressedData = fileData[offset...] + + guard let data = self.decompress(data: compressedData, algorithm: header.compression) else { throw UTS46Error.decompressionError } @@ -75,6 +153,14 @@ enum UTS46 { } } + isLoaded = true + } + + static func loadIfNecessary() throws { + guard !isLoaded else { return } + guard let url = Bundle(for: Self.self).url(forResource: "uts46", withExtension: nil) else { throw CocoaError(.fileNoSuchFile) } + + try load(from: url) } private static func decompress(data: Data, algorithm: CompressionAlgorithm?) -> Data? { @@ -92,7 +178,6 @@ enum UTS46 { rawAlgorithm = COMPRESSION_ZLIB default: return data - } let capacity = 100_000 diff --git a/Shared/uts46.xz b/Shared/uts46 similarity index 98% rename from Shared/uts46.xz rename to Shared/uts46 index b6c3364..0ce8761 100644 Binary files a/Shared/uts46.xz and b/Shared/uts46 differ