From ff2b44d6227f204de8dbad7a266a057a5fd414bf Mon Sep 17 00:00:00 2001 From: fearlessfe <505380967@qq.com> Date: Fri, 25 Oct 2024 19:35:31 +0800 Subject: [PATCH 1/5] feat: add spec test cases --- .gitmodules | 3 + build.zig | 7 ++ build.zig.zon | 5 +- consensus-spec-tests | 1 + serialized.ssz_snappy | Bin 18 -> 0 bytes src/spec_tests/ssz_static/root.zig | 121 ++++++++++++++++++++++++++--- 6 files changed, 121 insertions(+), 16 deletions(-) create mode 160000 consensus-spec-tests delete mode 100644 serialized.ssz_snappy diff --git a/.gitmodules b/.gitmodules index 639fd42..fc0d547 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "bls"] path = bls url = https://github.com/herumi/bls.git +[submodule "consensus-spec-tests"] + path = consensus-spec-tests + url = https://github.com/ethereum/consensus-spec-tests.git diff --git a/build.zig b/build.zig index c6ef9f8..f72e612 100644 --- a/build.zig +++ b/build.zig @@ -94,6 +94,13 @@ pub fn build(b: *std.Build) void { lib_unit_tests.root_module.addImport("bls", bls); lib_unit_tests.addIncludePath(b.path("bls/include/")); lib_unit_tests.addIncludePath(b.path("bls/mcl/include/")); + const yaml = b.dependency("yaml", .{ + .target = target, + .optimize = optimize, + }); + + lib_unit_tests.root_module.addImport("yaml", yaml.module("yaml")); + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); const exe_unit_tests = b.addTest(.{ diff --git a/build.zig.zon b/build.zig.zon index 8a5b488..cce63cd 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -22,10 +22,7 @@ // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. // Once all dependencies are fetched, `zig build` no longer requires // internet connectivity. - // .dependencies = .{ .zabi = .{ - // .url = "https://github.com/Raiden1411/zabi/archive/79c77aa4a39d41ed50033fd922f4d37c0f0638bb.tar.gz", - // .hash = "122095aaf2d09e3286bf8b00b773b65c1632639de6c5005de881754df7c04efc7e98", - // } }, + .dependencies = .{ .yaml = .{ .path = "../zig-yaml" } }, .paths = .{ "build.zig", "build.zig.zon", diff --git a/consensus-spec-tests b/consensus-spec-tests new file mode 160000 index 0000000..09c1e41 --- /dev/null +++ b/consensus-spec-tests @@ -0,0 +1 @@ +Subproject commit 09c1e41b722216efa9b1c6390169b984f0870052 diff --git a/serialized.ssz_snappy b/serialized.ssz_snappy deleted file mode 100644 index 0de2290acdf50a13502872d2272ec1195104b0ce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18 ZcmWfzDSch9DsbgdJwvfLf80WYY5+oQ2H5}r diff --git a/src/spec_tests/ssz_static/root.zig b/src/spec_tests/ssz_static/root.zig index ed6460a..afc5390 100644 --- a/src/spec_tests/ssz_static/root.zig +++ b/src/spec_tests/ssz_static/root.zig @@ -4,26 +4,123 @@ const ssz = @import("../../ssz/ssz.zig"); const types = @import("../../consensus/types.zig"); const snappy = @import("../../snappy/snappy.zig"); -test "hash tree root" { - const fork = types.Fork{ - .previous_version = [4]u8{ 0x75, 0xeb, 0x7f, 0x25 }, - .current_version = [4]u8{ 0x10, 0xd4, 0xe2, 0x7f }, - .epoch = 8876772290899440384, - }; +const Yaml = @import("yaml").Yaml; + +const gpa = testing.allocator; + +fn loadFromFile(file_path: []const u8) !Yaml { + const file = try std.fs.cwd().openFile(file_path, .{}); + defer file.close(); + + const source = try file.readToEndAlloc(gpa, std.math.maxInt(u32)); + defer gpa.free(source); + + return Yaml.load(gpa, source); +} + +const Roots = struct { + root: [32]u8, +}; + +const TestCasesUnion = union { + Fork: types.Fork, +}; + +fn getLeafDirs(allocator: std.mem.Allocator, path: []const u8) !std.ArrayList([]const u8) { + var leafDirs = std.ArrayList([]const u8).init(allocator); + // defer leafDirs.deinit(); + var list = std.ArrayList([]const u8).init(allocator); + defer { + for (list.items) |item| { + allocator.free(item); + } + list.deinit(); + } + try list.append(path); + + var index: u32 = 0; + + while (index < list.items.len) { + var hasSubDir = false; + const currentPath = list.items[index]; + var dir = try std.fs.cwd().openDir(currentPath, .{}); + defer dir.close(); + + var iter = dir.iterate(); + + while (try iter.next()) |entry| { + if (entry.kind == .directory) { + hasSubDir = true; + const fullPath = try std.fs.path.join(allocator, &[_][]const u8{ currentPath, entry.name }); + try list.append(fullPath); + } + } + if (!hasSubDir) { + // std.debug.print("currentPath: {s}\n", .{currentPath}); + try leafDirs.append(try allocator.dupe(u8, currentPath)); + // try leafDirs.append(currentPath); + // defer allocator.free(currentPath); + } + index += 1; + } + + return leafDirs; +} + +fn testSSZStatic(path: []const u8, t: type) !void { + // parse from yaml + const valueFile = try std.fmt.allocPrint(testing.allocator, "{s}/value.yaml", .{path}); + defer testing.allocator.free(valueFile); + var parsed = try loadFromFile(valueFile); + defer parsed.deinit(); + const fork = try parsed.parse(t); + // test hash tree root var out: [32]u8 = [_]u8{0} ** 32; try ssz.hashTreeRoot(fork, &out, testing.allocator); - const expect: [32]u8 = [_]u8{ 0x98, 0x2a, 0x69, 0x96, 0xc9, 0x2f, 0x86, 0xf6, 0x37, 0x68, 0x3c, 0x72, 0xd9, 0x09, 0xc7, 0xa8, 0x68, 0x11, 0x0e, 0x3b, 0x05, 0xf7, 0xb4, 0x48, 0x44, 0xbc, 0x53, 0x96, 0x0d, 0x89, 0x56, 0xf5 }; + const rootFile = try std.fmt.allocPrint(testing.allocator, "{s}/roots.yaml", .{path}); + defer testing.allocator.free(rootFile); + var rootData = try loadFromFile(rootFile); + defer rootData.deinit(); + const root = try rootData.parse(Roots); + const expect: [32]u8 = root.root; try std.testing.expect(std.mem.eql(u8, out[0..], expect[0..])); - const file_path = "serialized.ssz_snappy"; + // test ssz encode + const file_path = try std.fmt.allocPrint(testing.allocator, "{s}/serialized.ssz_snappy", .{path}); + defer testing.allocator.free(file_path); const file_contents = try std.fs.cwd().readFileAlloc(testing.allocator, file_path, std.math.maxInt(usize)); defer testing.allocator.free(file_contents); - // std.debug.print("Hex: {any}\n", .{std.fmt.fmtSliceHexLower(file_contents)}); - const decoded_data = try snappy.decode(testing.allocator, file_contents); defer testing.allocator.free(decoded_data); - const encode = try ssz.encodeSSZ(testing.allocator, fork); defer testing.allocator.free(encode); - try std.testing.expect(std.mem.eql(u8, encode, decoded_data)); + + // test ssz decode + const decode = try ssz.decodeSSZ(t, decoded_data); + try std.testing.expectEqualDeep(decode, fork); +} + +test "ssz static" { + const testPath = "consensus-spec-tests/tests/mainnet"; + const gpa1 = testing.allocator; + const fields = @typeInfo(TestCasesUnion).@"union".fields; + inline for (fields) |field| { + const fieldType = field.type; + const fieldName = field.name; + const ssz_type_path = try std.fmt.allocPrint(gpa1, "{s}/phase0/ssz_static/{s}", .{ testPath, fieldName }); + + var dirs = try getLeafDirs(gpa1, ssz_type_path); + + // deinit the dirs array + defer { + for (dirs.items) |item| { + gpa1.free(item); + } + dirs.deinit(); + } + + for (dirs.items) |dir| { + try testSSZStatic(dir, fieldType); + } + } } From fba130fd4697210b2aea9c56b7bd53f52053332c Mon Sep 17 00:00:00 2001 From: fearlessfe <505380967@qq.com> Date: Tue, 29 Oct 2024 03:36:28 +0100 Subject: [PATCH 2/5] feat: add dep of yaml --- build.zig.zon | 2 +- src/snappy/snappy.zig | 2 ++ src/spec_tests/ssz_static/root.zig | 2 +- src/ssz/ssz.zig | 2 +- src/ssz/zeros.zig | 3 ++- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index cce63cd..23b0e08 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -22,7 +22,7 @@ // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. // Once all dependencies are fetched, `zig build` no longer requires // internet connectivity. - .dependencies = .{ .yaml = .{ .path = "../zig-yaml" } }, + .dependencies = .{ .yaml = .{ .url = "git+https://github.com/optimism-java/zig-yaml.git#7a7c5162230bb08f73ec22a063010ed7c76ccded", .hash = "1220a6d7148f7777d997ee663e0efe0fb9c57a38658ea164bcd267e8391c3ce0aa87" } }, .paths = .{ "build.zig", "build.zig.zon", diff --git a/src/snappy/snappy.zig b/src/snappy/snappy.zig index a7f5fba..1638967 100644 --- a/src/snappy/snappy.zig +++ b/src/snappy/snappy.zig @@ -1,3 +1,5 @@ +//! The code bellow is essentially a port of https://github.com/gsquire/zig-snappy + const std = @import("std"); const Allocator = std.mem.Allocator; const crc32 = std.hash.crc.Crc32Iscsi; diff --git a/src/spec_tests/ssz_static/root.zig b/src/spec_tests/ssz_static/root.zig index afc5390..63d805c 100644 --- a/src/spec_tests/ssz_static/root.zig +++ b/src/spec_tests/ssz_static/root.zig @@ -43,7 +43,7 @@ fn getLeafDirs(allocator: std.mem.Allocator, path: []const u8) !std.ArrayList([] while (index < list.items.len) { var hasSubDir = false; const currentPath = list.items[index]; - var dir = try std.fs.cwd().openDir(currentPath, .{}); + var dir = try std.fs.cwd().openDir(currentPath, .{ .iterate = true }); defer dir.close(); var iter = dir.iterate(); diff --git a/src/ssz/ssz.zig b/src/ssz/ssz.zig index bd895cf..0d8b356 100644 --- a/src/ssz/ssz.zig +++ b/src/ssz/ssz.zig @@ -1,4 +1,4 @@ -//! The code bellow is essentially a port of https://github.com/Raiden1411/zabi +//! The code bellow is essentially a combination of https://github.com/Raiden1411/zabi and https://github.com/gballet/ssz.zig //! to the most recent version of zig with a couple of stylistic changes and support for //! other zig types. diff --git a/src/ssz/zeros.zig b/src/ssz/zeros.zig index a5d359f..36c41d5 100644 --- a/src/ssz/zeros.zig +++ b/src/ssz/zeros.zig @@ -1,4 +1,5 @@ -// List of root hashes of zero-subtries, up to depth 255. +//! The code bellow is essentially a port of https://github.com/gballet/ssz.zig +//! // List of root hashes of zero-subtries, up to depth 255. const std = @import("std"); const strs = [_][]const u8{ From 20f0254b63f037f91e406213da414c75d5e00ba2 Mon Sep 17 00:00:00 2001 From: fearlessfe <505380967@qq.com> Date: Tue, 29 Oct 2024 04:23:51 +0100 Subject: [PATCH 3/5] feat: download spec test cases --- .gitignore | 3 ++- .gitmodules | 3 --- Makefile | 7 +++++++ consensus-spec-tests | 1 - 4 files changed, 9 insertions(+), 5 deletions(-) delete mode 160000 consensus-spec-tests diff --git a/.gitignore b/.gitignore index b96e2c3..9e4983f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ .zig-cache zig-out -.vscode \ No newline at end of file +.vscode +consensus-spec-tests/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index fc0d547..639fd42 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "bls"] path = bls url = https://github.com/herumi/bls.git -[submodule "consensus-spec-tests"] - path = consensus-spec-tests - url = https://github.com/ethereum/consensus-spec-tests.git diff --git a/Makefile b/Makefile index 87e2cc2..2d6eca0 100644 --- a/Makefile +++ b/Makefile @@ -9,8 +9,15 @@ check_nasm: fi \ fi +SPEC_VERSION ?= v1.5.0-alpha.8 + # Update deps target to include nasm check deps: check_nasm @echo "Installing dependencies" @git submodule update --init --recursive @cd bls && make -f Makefile.onelib ETH_CFLAGS=-DBLS_ETH LIB_DIR=lib + + mkdir -p consensus-spec-tests + wget https://github.com/ethereum/consensus-spec-tests/releases/download/$(SPEC_VERSION)/general.tar.gz -O - | tar -xz -C consensus-spec-tests + wget https://github.com/ethereum/consensus-spec-tests/releases/download/$(SPEC_VERSION)/minimal.tar.gz -O - | tar -xz -C consensus-spec-tests + wget https://github.com/ethereum/consensus-spec-tests/releases/download/$(SPEC_VERSION)/mainnet.tar.gz -O - | tar -xz -C consensus-spec-tests diff --git a/consensus-spec-tests b/consensus-spec-tests deleted file mode 160000 index 09c1e41..0000000 --- a/consensus-spec-tests +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 09c1e41b722216efa9b1c6390169b984f0870052 From 1cb9afa929ca773c06c9110dc482c913d7927052 Mon Sep 17 00:00:00 2001 From: PengZhen Date: Tue, 29 Oct 2024 14:05:58 +0800 Subject: [PATCH 4/5] feat: add yaml lib --- .github/workflows/check.yml | 5 + Makefile | 1 + build.zig | 6 - build.zig.zon | 1 - clib/libyaml | 1 + src/spec_tests/ssz_static/root.zig | 79 +-- src/yaml/Tokenizer.zig | 575 +++++++++++++++++++++ src/yaml/parse.zig | 794 +++++++++++++++++++++++++++++ src/yaml/parse/test.zig | 764 +++++++++++++++++++++++++++ src/yaml/yaml.zig | 526 +++++++++++++++++++ src/yaml/yaml/test.zig | 539 ++++++++++++++++++++ 11 files changed, 3252 insertions(+), 39 deletions(-) create mode 160000 clib/libyaml create mode 100644 src/yaml/Tokenizer.zig create mode 100644 src/yaml/parse.zig create mode 100644 src/yaml/parse/test.zig create mode 100644 src/yaml/yaml.zig create mode 100644 src/yaml/yaml/test.zig diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 02e029b..43cab8f 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -47,6 +47,11 @@ jobs: - name: Installing dependencies run: | make deps + + - name: Installing spec test + run: | + make deps_test + - name: Unit testing run: zig build test --summary all diff --git a/Makefile b/Makefile index 2d6eca0..5644b9d 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ deps: check_nasm @git submodule update --init --recursive @cd bls && make -f Makefile.onelib ETH_CFLAGS=-DBLS_ETH LIB_DIR=lib +deps_test: mkdir -p consensus-spec-tests wget https://github.com/ethereum/consensus-spec-tests/releases/download/$(SPEC_VERSION)/general.tar.gz -O - | tar -xz -C consensus-spec-tests wget https://github.com/ethereum/consensus-spec-tests/releases/download/$(SPEC_VERSION)/minimal.tar.gz -O - | tar -xz -C consensus-spec-tests diff --git a/build.zig b/build.zig index f72e612..66e85ed 100644 --- a/build.zig +++ b/build.zig @@ -94,12 +94,6 @@ pub fn build(b: *std.Build) void { lib_unit_tests.root_module.addImport("bls", bls); lib_unit_tests.addIncludePath(b.path("bls/include/")); lib_unit_tests.addIncludePath(b.path("bls/mcl/include/")); - const yaml = b.dependency("yaml", .{ - .target = target, - .optimize = optimize, - }); - - lib_unit_tests.root_module.addImport("yaml", yaml.module("yaml")); const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); diff --git a/build.zig.zon b/build.zig.zon index 23b0e08..7cda98d 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -22,7 +22,6 @@ // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. // Once all dependencies are fetched, `zig build` no longer requires // internet connectivity. - .dependencies = .{ .yaml = .{ .url = "git+https://github.com/optimism-java/zig-yaml.git#7a7c5162230bb08f73ec22a063010ed7c76ccded", .hash = "1220a6d7148f7777d997ee663e0efe0fb9c57a38658ea164bcd267e8391c3ce0aa87" } }, .paths = .{ "build.zig", "build.zig.zon", diff --git a/clib/libyaml b/clib/libyaml new file mode 160000 index 0000000..840b65c --- /dev/null +++ b/clib/libyaml @@ -0,0 +1 @@ +Subproject commit 840b65c40675e2d06bf40405ad3f12dec7f35923 diff --git a/src/spec_tests/ssz_static/root.zig b/src/spec_tests/ssz_static/root.zig index 63d805c..b74f5ef 100644 --- a/src/spec_tests/ssz_static/root.zig +++ b/src/spec_tests/ssz_static/root.zig @@ -4,10 +4,15 @@ const ssz = @import("../../ssz/ssz.zig"); const types = @import("../../consensus/types.zig"); const snappy = @import("../../snappy/snappy.zig"); -const Yaml = @import("yaml").Yaml; +const Yaml = @import("../../yaml/yaml.zig").Yaml; const gpa = testing.allocator; +/// Loads and parses a YAML file into a Yaml object +/// Parameters: +/// file_path: Path to the YAML file to load +/// Returns: +/// Parsed Yaml object or error fn loadFromFile(file_path: []const u8) !Yaml { const file = try std.fs.cwd().openFile(file_path, .{}); defer file.close(); @@ -17,15 +22,46 @@ fn loadFromFile(file_path: []const u8) !Yaml { return Yaml.load(gpa, source); } - +// load root.yml in spec test const Roots = struct { root: [32]u8, }; - -const TestCasesUnion = union { +// test cases for all phases +const CommonUnion = union { Fork: types.Fork, }; +test "ssz static" { + const testPath = "consensus-spec-tests/tests/mainnet"; + const gpa1 = testing.allocator; + const fields = @typeInfo(CommonUnion).@"union".fields; + inline for (fields) |field| { + const fieldType = field.type; + const fieldName = field.name; + const ssz_type_path = try std.fmt.allocPrint(gpa1, "{s}/phase0/ssz_static/{s}", .{ testPath, fieldName }); + + var dirs = try getLeafDirs(gpa1, ssz_type_path); + + // deinit the dirs array + defer { + for (dirs.items) |item| { + gpa1.free(item); + } + dirs.deinit(); + } + + for (dirs.items) |dir| { + try testSSZStatic(dir, fieldType); + } + } +} + +/// Recursively finds all leaf directories (directories with no subdirectories) starting from the given path +/// Parameters: +/// allocator: Memory allocator for dynamic allocations +/// path: Starting directory path to search from +/// Returns: +/// ArrayList containing paths to all leaf directories fn getLeafDirs(allocator: std.mem.Allocator, path: []const u8) !std.ArrayList([]const u8) { var leafDirs = std.ArrayList([]const u8).init(allocator); // defer leafDirs.deinit(); @@ -56,10 +92,7 @@ fn getLeafDirs(allocator: std.mem.Allocator, path: []const u8) !std.ArrayList([] } } if (!hasSubDir) { - // std.debug.print("currentPath: {s}\n", .{currentPath}); try leafDirs.append(try allocator.dupe(u8, currentPath)); - // try leafDirs.append(currentPath); - // defer allocator.free(currentPath); } index += 1; } @@ -67,6 +100,13 @@ fn getLeafDirs(allocator: std.mem.Allocator, path: []const u8) !std.ArrayList([] return leafDirs; } +/// Tests SSZ (Simple Serialize) static functionality by performing: +/// 1. YAML parsing +/// 2. Hash tree root verification +/// 3. SSZ encoding/decoding with snappy compression +/// Parameters: +/// path: Directory path containing test files +/// t: Type to test SSZ operations against fn testSSZStatic(path: []const u8, t: type) !void { // parse from yaml const valueFile = try std.fmt.allocPrint(testing.allocator, "{s}/value.yaml", .{path}); @@ -99,28 +139,3 @@ fn testSSZStatic(path: []const u8, t: type) !void { const decode = try ssz.decodeSSZ(t, decoded_data); try std.testing.expectEqualDeep(decode, fork); } - -test "ssz static" { - const testPath = "consensus-spec-tests/tests/mainnet"; - const gpa1 = testing.allocator; - const fields = @typeInfo(TestCasesUnion).@"union".fields; - inline for (fields) |field| { - const fieldType = field.type; - const fieldName = field.name; - const ssz_type_path = try std.fmt.allocPrint(gpa1, "{s}/phase0/ssz_static/{s}", .{ testPath, fieldName }); - - var dirs = try getLeafDirs(gpa1, ssz_type_path); - - // deinit the dirs array - defer { - for (dirs.items) |item| { - gpa1.free(item); - } - dirs.deinit(); - } - - for (dirs.items) |dir| { - try testSSZStatic(dir, fieldType); - } - } -} diff --git a/src/yaml/Tokenizer.zig b/src/yaml/Tokenizer.zig new file mode 100644 index 0000000..47e106e --- /dev/null +++ b/src/yaml/Tokenizer.zig @@ -0,0 +1,575 @@ +const Tokenizer = @This(); + +const std = @import("std"); +const log = std.log.scoped(.tokenizer); +const testing = std.testing; + +buffer: []const u8, +index: usize = 0, + +pub const Token = struct { + id: Id, + start: usize, + end: usize, + + pub const Id = enum { + // zig fmt: off + eof, + + new_line, + doc_start, // --- + doc_end, // ... + seq_item_ind, // - + map_value_ind, // : + flow_map_start, // { + flow_map_end, // } + flow_seq_start, // [ + flow_seq_end, // ] + + comma, + space, + tab, + comment, // # + alias, // * + anchor, // & + tag, // ! + + single_quoted, // '...' + double_quoted, // "..." + literal, + // zig fmt: on + }; +}; + +pub const TokenIndex = usize; + +pub const TokenIterator = struct { + buffer: []const Token, + pos: TokenIndex = 0, + + pub fn next(self: *TokenIterator) ?Token { + const token = self.peek() orelse return null; + self.pos += 1; + return token; + } + + pub fn peek(self: TokenIterator) ?Token { + if (self.pos >= self.buffer.len) return null; + return self.buffer[self.pos]; + } + + pub fn reset(self: *TokenIterator) void { + self.pos = 0; + } + + pub fn seekTo(self: *TokenIterator, pos: TokenIndex) void { + self.pos = pos; + } + + pub fn seekBy(self: *TokenIterator, offset: isize) void { + const new_pos = @as(isize, @bitCast(self.pos)) + offset; + if (new_pos < 0) { + self.pos = 0; + } else { + self.pos = @as(usize, @intCast(new_pos)); + } + } +}; + +fn stringMatchesPattern(comptime pattern: []const u8, slice: []const u8) bool { + comptime var count: usize = 0; + inline while (count < pattern.len) : (count += 1) { + if (count >= slice.len) return false; + const c = slice[count]; + if (pattern[count] != c) return false; + } + return true; +} + +fn matchesPattern(self: Tokenizer, comptime pattern: []const u8) bool { + return stringMatchesPattern(pattern, self.buffer[self.index..]); +} + +pub fn next(self: *Tokenizer) Token { + var result = Token{ + .id = .eof, + .start = self.index, + .end = undefined, + }; + + var state: enum { + start, + new_line, + space, + tab, + comment, + single_quoted, + double_quoted, + literal, + } = .start; + + while (self.index < self.buffer.len) : (self.index += 1) { + const c = self.buffer[self.index]; + switch (state) { + .start => switch (c) { + ' ' => { + state = .space; + }, + '\t' => { + state = .tab; + }, + '\n' => { + result.id = .new_line; + self.index += 1; + break; + }, + '\r' => { + state = .new_line; + }, + + '-' => if (self.matchesPattern("---")) { + result.id = .doc_start; + self.index += "---".len; + break; + } else if (self.matchesPattern("- ")) { + result.id = .seq_item_ind; + self.index += "- ".len; + break; + } else { + state = .literal; + }, + + '.' => if (self.matchesPattern("...")) { + result.id = .doc_end; + self.index += "...".len; + break; + } else { + state = .literal; + }, + + ',' => { + result.id = .comma; + self.index += 1; + break; + }, + '#' => { + state = .comment; + }, + '*' => { + result.id = .alias; + self.index += 1; + break; + }, + '&' => { + result.id = .anchor; + self.index += 1; + break; + }, + '!' => { + result.id = .tag; + self.index += 1; + break; + }, + '[' => { + result.id = .flow_seq_start; + self.index += 1; + break; + }, + ']' => { + result.id = .flow_seq_end; + self.index += 1; + break; + }, + ':' => { + result.id = .map_value_ind; + self.index += 1; + break; + }, + '{' => { + result.id = .flow_map_start; + self.index += 1; + break; + }, + '}' => { + result.id = .flow_map_end; + self.index += 1; + break; + }, + '\'' => { + state = .single_quoted; + }, + '"' => { + state = .double_quoted; + }, + else => { + state = .literal; + }, + }, + + .comment => switch (c) { + '\r', '\n' => { + result.id = .comment; + break; + }, + else => {}, + }, + + .space => switch (c) { + ' ' => {}, + else => { + result.id = .space; + break; + }, + }, + + .tab => switch (c) { + '\t' => {}, + else => { + result.id = .tab; + break; + }, + }, + + .new_line => switch (c) { + '\n' => { + result.id = .new_line; + self.index += 1; + break; + }, + else => {}, // TODO this should be an error condition + }, + + .single_quoted => switch (c) { + '\'' => if (!self.matchesPattern("''")) { + result.id = .single_quoted; + self.index += 1; + break; + } else { + self.index += "''".len - 1; + }, + else => {}, + }, + + .double_quoted => switch (c) { + '"' => { + if (stringMatchesPattern("\\", self.buffer[self.index - 1 ..])) { + self.index += 1; + } else { + result.id = .double_quoted; + self.index += 1; + break; + } + }, + else => {}, + }, + + .literal => switch (c) { + '\r', '\n', ' ', '\'', '"', ',', ':', ']', '}' => { + result.id = .literal; + break; + }, + else => { + result.id = .literal; + }, + }, + } + } + + if (self.index >= self.buffer.len) { + switch (state) { + .literal => { + result.id = .literal; + }, + else => {}, + } + } + + result.end = self.index; + + log.debug("{any}", .{result}); + log.debug(" | {s}", .{self.buffer[result.start..result.end]}); + + return result; +} + +fn testExpected(source: []const u8, expected: []const Token.Id) !void { + var tokenizer = Tokenizer{ + .buffer = source, + }; + + var given = std.ArrayList(Token.Id).init(testing.allocator); + defer given.deinit(); + + while (true) { + const token = tokenizer.next(); + try given.append(token.id); + if (token.id == .eof) break; + } + + try testing.expectEqualSlices(Token.Id, expected, given.items); +} + +test { + std.testing.refAllDecls(@This()); +} + +test "empty doc" { + try testExpected("", &[_]Token.Id{.eof}); +} + +test "empty doc with explicit markers" { + try testExpected( + \\--- + \\... + , &[_]Token.Id{ + .doc_start, .new_line, .doc_end, .eof, + }); +} + +test "empty doc with explicit markers and a directive" { + try testExpected( + \\--- !tbd-v1 + \\... + , &[_]Token.Id{ + .doc_start, + .space, + .tag, + .literal, + .new_line, + .doc_end, + .eof, + }); +} + +test "sequence of values" { + try testExpected( + \\- 0 + \\- 1 + \\- 2 + , &[_]Token.Id{ + .seq_item_ind, + .literal, + .new_line, + .seq_item_ind, + .literal, + .new_line, + .seq_item_ind, + .literal, + .eof, + }); +} + +test "sequence of sequences" { + try testExpected( + \\- [ val1, val2] + \\- [val3, val4 ] + , &[_]Token.Id{ + .seq_item_ind, + .flow_seq_start, + .space, + .literal, + .comma, + .space, + .literal, + .flow_seq_end, + .new_line, + .seq_item_ind, + .flow_seq_start, + .literal, + .comma, + .space, + .literal, + .space, + .flow_seq_end, + .eof, + }); +} + +test "mappings" { + try testExpected( + \\key1: value1 + \\key2: value2 + , &[_]Token.Id{ + .literal, + .map_value_ind, + .space, + .literal, + .new_line, + .literal, + .map_value_ind, + .space, + .literal, + .eof, + }); +} + +test "inline mapped sequence of values" { + try testExpected( + \\key : [ val1, + \\ val2 ] + , &[_]Token.Id{ + .literal, + .space, + .map_value_ind, + .space, + .flow_seq_start, + .space, + .literal, + .comma, + .space, + .new_line, + .space, + .literal, + .space, + .flow_seq_end, + .eof, + }); +} + +test "part of tbd" { + try testExpected( + \\--- !tapi-tbd + \\tbd-version: 4 + \\targets: [ x86_64-macos ] + \\ + \\uuids: + \\ - target: x86_64-macos + \\ value: F86CC732-D5E4-30B5-AA7D-167DF5EC2708 + \\ + \\install-name: '/usr/lib/libSystem.B.dylib' + \\... + , &[_]Token.Id{ + .doc_start, + .space, + .tag, + .literal, + .new_line, + .literal, + .map_value_ind, + .space, + .literal, + .new_line, + .literal, + .map_value_ind, + .space, + .flow_seq_start, + .space, + .literal, + .space, + .flow_seq_end, + .new_line, + .new_line, + .literal, + .map_value_ind, + .new_line, + .space, + .seq_item_ind, + .literal, + .map_value_ind, + .space, + .literal, + .new_line, + .space, + .literal, + .map_value_ind, + .space, + .literal, + .new_line, + .new_line, + .literal, + .map_value_ind, + .space, + .single_quoted, + .new_line, + .doc_end, + .eof, + }); +} + +test "Unindented list" { + try testExpected( + \\b: + \\- foo: 1 + \\c: 1 + , &[_]Token.Id{ + .literal, + .map_value_ind, + .new_line, + .seq_item_ind, + .literal, + .map_value_ind, + .space, + .literal, + .new_line, + .literal, + .map_value_ind, + .space, + .literal, + .eof, + }); +} + +test "escape sequences" { + try testExpected( + \\a: 'here''s an apostrophe' + \\b: "a newline\nand a\ttab" + \\c: "\"here\" and there" + , &[_]Token.Id{ + .literal, + .map_value_ind, + .space, + .single_quoted, + .new_line, + .literal, + .map_value_ind, + .space, + .double_quoted, + .new_line, + .literal, + .map_value_ind, + .space, + .double_quoted, + .eof, + }); +} + +test "comments" { + try testExpected( + \\key: # some comment about the key + \\# first value + \\- val1 + \\# second value + \\- val2 + , &[_]Token.Id{ + .literal, + .map_value_ind, + .space, + .comment, + .new_line, + .comment, + .new_line, + .seq_item_ind, + .literal, + .new_line, + .comment, + .new_line, + .seq_item_ind, + .literal, + .eof, + }); +} + +test "quoted literals" { + try testExpected( + \\'#000000' + \\'[000000' + \\"&someString" + , &[_]Token.Id{ + .single_quoted, + .new_line, + .single_quoted, + .new_line, + .double_quoted, + .eof, + }); +} diff --git a/src/yaml/parse.zig b/src/yaml/parse.zig new file mode 100644 index 0000000..9648f8e --- /dev/null +++ b/src/yaml/parse.zig @@ -0,0 +1,794 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.parse); +const mem = std.mem; + +const Allocator = mem.Allocator; +const Tokenizer = @import("Tokenizer.zig"); +const Token = Tokenizer.Token; +const TokenIndex = Tokenizer.TokenIndex; +const TokenIterator = Tokenizer.TokenIterator; + +pub const ParseError = error{ + InvalidEscapeSequence, + MalformedYaml, + NestedDocuments, + UnexpectedEof, + UnexpectedToken, + Unhandled, +} || Allocator.Error; + +pub const Node = struct { + tag: Tag, + tree: *const Tree, + start: TokenIndex, + end: TokenIndex, + + pub const Tag = enum { + doc, + map, + list, + value, + }; + + pub fn cast(self: *const Node, comptime T: type) ?*const T { + if (self.tag != T.base_tag) { + return null; + } + return @fieldParentPtr("base", self); + } + + pub fn deinit(self: *Node, allocator: Allocator) void { + switch (self.tag) { + .doc => { + const parent: *Node.Doc = @fieldParentPtr("base", self); + parent.deinit(allocator); + allocator.destroy(parent); + }, + .map => { + const parent: *Node.Map = @fieldParentPtr("base", self); + parent.deinit(allocator); + allocator.destroy(parent); + }, + .list => { + const parent: *Node.List = @fieldParentPtr("base", self); + parent.deinit(allocator); + allocator.destroy(parent); + }, + .value => { + const parent: *Node.Value = @fieldParentPtr("base", self); + parent.deinit(allocator); + allocator.destroy(parent); + }, + } + } + + pub fn format( + self: *const Node, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + return switch (self.tag) { + .doc => @as(*const Node.Doc, @fieldParentPtr("base", self)).format(fmt, options, writer), + .map => @as(*const Node.Map, @fieldParentPtr("base", self)).format(fmt, options, writer), + .list => @as(*const Node.List, @fieldParentPtr("base", self)).format(fmt, options, writer), + .value => @as(*const Node.Value, @fieldParentPtr("base", self)).format(fmt, options, writer), + }; + } + + pub const Doc = struct { + base: Node = Node{ + .tag = Tag.doc, + .tree = undefined, + .start = undefined, + .end = undefined, + }, + directive: ?TokenIndex = null, + value: ?*Node = null, + + pub const base_tag: Node.Tag = .doc; + + pub fn deinit(self: *Doc, allocator: Allocator) void { + if (self.value) |node| { + node.deinit(allocator); + } + } + + pub fn format( + self: *const Doc, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = fmt; + if (self.directive) |id| { + try std.fmt.format(writer, "{{ ", .{}); + const directive = self.base.tree.getRaw(id, id); + try std.fmt.format(writer, ".directive = {s}, ", .{directive}); + } + if (self.value) |node| { + try std.fmt.format(writer, "{}", .{node}); + } + if (self.directive != null) { + try std.fmt.format(writer, " }}", .{}); + } + } + }; + + pub const Map = struct { + base: Node = Node{ + .tag = Tag.map, + .tree = undefined, + .start = undefined, + .end = undefined, + }, + values: std.ArrayListUnmanaged(Entry) = .{}, + + pub const base_tag: Node.Tag = .map; + + pub const Entry = struct { + key: TokenIndex, + value: ?*Node, + }; + + pub fn deinit(self: *Map, allocator: Allocator) void { + for (self.values.items) |entry| { + if (entry.value) |value| { + value.deinit(allocator); + } + } + self.values.deinit(allocator); + } + + pub fn format( + self: *const Map, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = fmt; + try std.fmt.format(writer, "{{ ", .{}); + for (self.values.items) |entry| { + const key = self.base.tree.getRaw(entry.key, entry.key); + if (entry.value) |value| { + try std.fmt.format(writer, "{s} => {}, ", .{ key, value }); + } else { + try std.fmt.format(writer, "{s} => null, ", .{key}); + } + } + return std.fmt.format(writer, " }}", .{}); + } + }; + + pub const List = struct { + base: Node = Node{ + .tag = Tag.list, + .tree = undefined, + .start = undefined, + .end = undefined, + }, + values: std.ArrayListUnmanaged(*Node) = .{}, + + pub const base_tag: Node.Tag = .list; + + pub fn deinit(self: *List, allocator: Allocator) void { + for (self.values.items) |node| { + node.deinit(allocator); + } + self.values.deinit(allocator); + } + + pub fn format( + self: *const List, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = fmt; + try std.fmt.format(writer, "[ ", .{}); + for (self.values.items) |node| { + try std.fmt.format(writer, "{}, ", .{node}); + } + return std.fmt.format(writer, " ]", .{}); + } + }; + + pub const Value = struct { + base: Node = Node{ + .tag = Tag.value, + .tree = undefined, + .start = undefined, + .end = undefined, + }, + string_value: std.ArrayListUnmanaged(u8) = .{}, + + pub const base_tag: Node.Tag = .value; + + pub fn deinit(self: *Value, allocator: Allocator) void { + self.string_value.deinit(allocator); + } + + pub fn format( + self: *const Value, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = fmt; + const raw = self.base.tree.getRaw(self.base.start, self.base.end); + return std.fmt.format(writer, "{s}", .{raw}); + } + }; +}; + +pub const LineCol = struct { + line: usize, + col: usize, +}; + +pub const Tree = struct { + allocator: Allocator, + source: []const u8, + tokens: []Token, + line_cols: std.AutoHashMap(TokenIndex, LineCol), + docs: std.ArrayListUnmanaged(*Node) = .{}, + + pub fn init(allocator: Allocator) Tree { + return .{ + .allocator = allocator, + .source = undefined, + .tokens = undefined, + .line_cols = std.AutoHashMap(TokenIndex, LineCol).init(allocator), + }; + } + + pub fn deinit(self: *Tree) void { + self.allocator.free(self.tokens); + self.line_cols.deinit(); + for (self.docs.items) |doc| { + doc.deinit(self.allocator); + } + self.docs.deinit(self.allocator); + } + + pub fn getDirective(self: Tree, doc_index: usize) ?[]const u8 { + assert(doc_index < self.docs.items.len); + const doc = self.docs.items[doc_index].cast(Node.Doc) orelse return null; + const id = doc.directive orelse return null; + return self.getRaw(id, id); + } + + pub fn getRaw(self: Tree, start: TokenIndex, end: TokenIndex) []const u8 { + assert(start <= end); + assert(start < self.tokens.len and end < self.tokens.len); + const start_token = self.tokens[start]; + const end_token = self.tokens[end]; + return self.source[start_token.start..end_token.end]; + } + + pub fn parse(self: *Tree, source: []const u8) !void { + var tokenizer = Tokenizer{ .buffer = source }; + var tokens = std.ArrayList(Token).init(self.allocator); + defer tokens.deinit(); + + var line: usize = 0; + var prev_line_last_col: usize = 0; + + while (true) { + const token = tokenizer.next(); + const tok_id = tokens.items.len; + try tokens.append(token); + + try self.line_cols.putNoClobber(tok_id, .{ + .line = line, + .col = token.start - prev_line_last_col, + }); + + switch (token.id) { + .eof => break, + .new_line => { + line += 1; + prev_line_last_col = token.end; + }, + else => {}, + } + } + + self.source = source; + self.tokens = try tokens.toOwnedSlice(); + + var it = TokenIterator{ .buffer = self.tokens }; + var parser = Parser{ + .allocator = self.allocator, + .tree = self, + .token_it = &it, + .line_cols = &self.line_cols, + }; + + parser.eatCommentsAndSpace(&.{}); + + while (true) { + parser.eatCommentsAndSpace(&.{}); + const token = parser.token_it.next() orelse break; + + log.debug("(main) next {s}@{d}", .{ @tagName(token.id), parser.token_it.pos - 1 }); + + switch (token.id) { + .eof => break, + else => { + parser.token_it.seekBy(-1); + const doc = try parser.doc(); + try self.docs.append(self.allocator, doc); + }, + } + } + } +}; + +const Parser = struct { + allocator: Allocator, + tree: *Tree, + token_it: *TokenIterator, + line_cols: *const std.AutoHashMap(TokenIndex, LineCol), + + fn value(self: *Parser) ParseError!?*Node { + self.eatCommentsAndSpace(&.{}); + + const pos = self.token_it.pos; + const token = self.token_it.next() orelse return error.UnexpectedEof; + + log.debug(" next {s}@{d}", .{ @tagName(token.id), pos }); + + switch (token.id) { + .literal => if (self.eatToken(.map_value_ind, &.{ .new_line, .comment })) |_| { + // map + self.token_it.seekTo(pos); + return self.map(); + } else { + // leaf value + self.token_it.seekTo(pos); + return self.leaf_value(); + }, + .single_quoted, .double_quoted => { + // leaf value + self.token_it.seekBy(-1); + return self.leaf_value(); + }, + .seq_item_ind => { + // list + self.token_it.seekBy(-1); + return self.list(); + }, + .flow_seq_start => { + // list + self.token_it.seekBy(-1); + return self.list_bracketed(); + }, + else => return null, + } + } + + fn doc(self: *Parser) ParseError!*Node { + const node = try self.allocator.create(Node.Doc); + errdefer self.allocator.destroy(node); + node.* = .{}; + node.base.tree = self.tree; + node.base.start = self.token_it.pos; + + log.debug("(doc) begin {s}@{d}", .{ @tagName(self.tree.tokens[node.base.start].id), node.base.start }); + // json format yaml + const is_one_line: bool = if (self.eatToken(.flow_map_start, &.{})) |doc_pos| is_one_line: { + if (self.getCol(doc_pos) > 0) return error.MalformedYaml; + if (self.eatToken(.tag, &.{ .new_line, .comment })) |_| { + node.directive = try self.expectToken(.literal, &.{ .new_line, .comment }); + } + break :is_one_line true; + } else false; + + if (is_one_line) { + return self.one_line_doc(node); + } + + // Parse header + const explicit_doc: bool = if (self.eatToken(.doc_start, &.{})) |doc_pos| explicit_doc: { + if (self.getCol(doc_pos) > 0) return error.MalformedYaml; + if (self.eatToken(.tag, &.{ .new_line, .comment })) |_| { + node.directive = try self.expectToken(.literal, &.{ .new_line, .comment }); + } + break :explicit_doc true; + } else false; + + // Parse value + node.value = try self.value(); + if (node.value == null) { + self.token_it.seekBy(-1); + } + errdefer if (node.value) |val| { + val.deinit(self.allocator); + }; + + // Parse footer + footer: { + if (self.eatToken(.doc_end, &.{})) |pos| { + if (!explicit_doc) return error.UnexpectedToken; + if (self.getCol(pos) > 0) return error.MalformedYaml; + node.base.end = pos; + break :footer; + } + if (self.eatToken(.doc_start, &.{})) |pos| { + if (!explicit_doc) return error.UnexpectedToken; + if (self.getCol(pos) > 0) return error.MalformedYaml; + self.token_it.seekBy(-1); + node.base.end = pos - 1; + break :footer; + } + if (self.eatToken(.eof, &.{})) |pos| { + node.base.end = pos - 1; + break :footer; + } + return error.UnexpectedToken; + } + + log.debug("(doc) end {s}@{d}", .{ @tagName(self.tree.tokens[node.base.end].id), node.base.end }); + + return &node.base; + } + + fn one_line_doc(self: *Parser, node: *Node.Doc) ParseError!*Node { + // Parse value + node.value = try self.value(); + if (node.value == null) { + self.token_it.seekBy(-1); + } + errdefer if (node.value) |val| { + val.deinit(self.allocator); + }; + + // Parse footer + footer: { + if (self.eatToken(.flow_seq_end, &.{})) |pos| { + if (self.getCol(pos) > 0) return error.MalformedYaml; + node.base.end = pos; + break :footer; + } + if (self.eatToken(.eof, &.{})) |pos| { + node.base.end = pos - 1; + break :footer; + } + return error.UnexpectedToken; + } + + log.debug("(doc) end {s}@{d}", .{ @tagName(self.tree.tokens[node.base.end].id), node.base.end }); + + return &node.base; + } + + fn map(self: *Parser) ParseError!*Node { + const node = try self.allocator.create(Node.Map); + errdefer self.allocator.destroy(node); + node.* = .{}; + node.base.tree = self.tree; + node.base.start = self.token_it.pos; + errdefer { + for (node.values.items) |entry| { + if (entry.value) |val| { + val.deinit(self.allocator); + } + } + node.values.deinit(self.allocator); + } + + log.debug("(map) begin {s}@{d}", .{ @tagName(self.tree.tokens[node.base.start].id), node.base.start }); + + const col = self.getCol(node.base.start); + + while (true) { + self.eatCommentsAndSpace(&.{}); + + // Parse key + const key_pos = self.token_it.pos; + if (self.getCol(key_pos) < col) { + break; + } + + const key = self.token_it.next() orelse return error.UnexpectedEof; + switch (key.id) { + .literal => {}, + .doc_start, .doc_end, .eof => { + self.token_it.seekBy(-1); + break; + }, + else => { + // TODO key not being a literal + // return error.Unhandled; + continue; + }, + } + + log.debug("(map) key {s}@{d}", .{ self.tree.getRaw(key_pos, key_pos), key_pos }); + + // Separator + _ = try self.expectToken(.map_value_ind, &.{ .new_line, .comment }); + + // Parse value + const val = try self.value(); + errdefer if (val) |v| { + v.deinit(self.allocator); + }; + + if (val) |v| { + if (self.getCol(v.start) < self.getCol(key_pos)) { + return error.MalformedYaml; + } + if (v.cast(Node.Value)) |_| { + if (self.getCol(v.start) == self.getCol(key_pos)) { + return error.MalformedYaml; + } + } + } + + try node.values.append(self.allocator, .{ + .key = key_pos, + .value = val, + }); + } + + node.base.end = self.token_it.pos - 1; + + log.debug("(map) end {s}@{d}", .{ @tagName(self.tree.tokens[node.base.end].id), node.base.end }); + + return &node.base; + } + + fn list(self: *Parser) ParseError!*Node { + const node = try self.allocator.create(Node.List); + errdefer self.allocator.destroy(node); + node.* = .{}; + node.base.tree = self.tree; + node.base.start = self.token_it.pos; + errdefer { + for (node.values.items) |val| { + val.deinit(self.allocator); + } + node.values.deinit(self.allocator); + } + + log.debug("(list) begin {s}@{d}", .{ @tagName(self.tree.tokens[node.base.start].id), node.base.start }); + + while (true) { + self.eatCommentsAndSpace(&.{}); + + _ = self.eatToken(.seq_item_ind, &.{}) orelse break; + + const val = (try self.value()) orelse return error.MalformedYaml; + try node.values.append(self.allocator, val); + } + + node.base.end = self.token_it.pos - 1; + + log.debug("(list) end {s}@{d}", .{ @tagName(self.tree.tokens[node.base.end].id), node.base.end }); + + return &node.base; + } + + fn list_bracketed(self: *Parser) ParseError!*Node { + const node = try self.allocator.create(Node.List); + errdefer self.allocator.destroy(node); + node.* = .{}; + node.base.tree = self.tree; + node.base.start = self.token_it.pos; + errdefer { + for (node.values.items) |val| { + val.deinit(self.allocator); + } + node.values.deinit(self.allocator); + } + + log.debug("(list) begin {s}@{d}", .{ @tagName(self.tree.tokens[node.base.start].id), node.base.start }); + + _ = try self.expectToken(.flow_seq_start, &.{}); + + while (true) { + self.eatCommentsAndSpace(&.{.comment}); + + if (self.eatToken(.flow_seq_end, &.{.comment})) |pos| { + node.base.end = pos; + break; + } + _ = self.eatToken(.comma, &.{.comment}); + + const val = (try self.value()) orelse return error.MalformedYaml; + try node.values.append(self.allocator, val); + } + + log.debug("(list) end {s}@{d}", .{ @tagName(self.tree.tokens[node.base.end].id), node.base.end }); + + return &node.base; + } + + fn leaf_value(self: *Parser) ParseError!*Node { + const node = try self.allocator.create(Node.Value); + errdefer self.allocator.destroy(node); + node.* = .{ .string_value = .{} }; + node.base.tree = self.tree; + node.base.start = self.token_it.pos; + errdefer node.string_value.deinit(self.allocator); + + // TODO handle multiline strings in new block scope + while (self.token_it.next()) |tok| { + switch (tok.id) { + .single_quoted => { + node.base.end = self.token_it.pos - 1; + const raw = self.tree.getRaw(node.base.start, node.base.end); + try self.parseSingleQuoted(node, raw); + break; + }, + .double_quoted => { + node.base.end = self.token_it.pos - 1; + const raw = self.tree.getRaw(node.base.start, node.base.end); + try self.parseDoubleQuoted(node, raw); + break; + }, + .literal => {}, + .space => { + const trailing = self.token_it.pos - 2; + self.eatCommentsAndSpace(&.{}); + if (self.token_it.peek()) |peek| { + if (peek.id != .literal) { + node.base.end = trailing; + const raw = self.tree.getRaw(node.base.start, node.base.end); + try node.string_value.appendSlice(self.allocator, raw); + break; + } + } + }, + else => { + self.token_it.seekBy(-1); + node.base.end = self.token_it.pos - 1; + const raw = self.tree.getRaw(node.base.start, node.base.end); + try node.string_value.appendSlice(self.allocator, raw); + break; + }, + } + } + + log.debug("(leaf) {s}", .{self.tree.getRaw(node.base.start, node.base.end)}); + + return &node.base; + } + + fn eatCommentsAndSpace(self: *Parser, comptime exclusions: []const Token.Id) void { + log.debug("eatCommentsAndSpace", .{}); + outer: while (self.token_it.next()) |token| { + log.debug(" (token '{s}')", .{@tagName(token.id)}); + switch (token.id) { + .comment, .space, .new_line => |space| { + inline for (exclusions) |excl| { + if (excl == space) { + self.token_it.seekBy(-1); + break :outer; + } + } else continue; + }, + else => { + self.token_it.seekBy(-1); + break; + }, + } + } + } + + fn eatToken(self: *Parser, id: Token.Id, comptime exclusions: []const Token.Id) ?TokenIndex { + log.debug("eatToken('{s}')", .{@tagName(id)}); + self.eatCommentsAndSpace(exclusions); + const pos = self.token_it.pos; + const token = self.token_it.next() orelse return null; + if (token.id == id) { + log.debug(" (found at {d})", .{pos}); + return pos; + } else { + log.debug(" (not found)", .{}); + self.token_it.seekBy(-1); + return null; + } + } + + fn expectToken(self: *Parser, id: Token.Id, comptime exclusions: []const Token.Id) ParseError!TokenIndex { + log.debug("expectToken('{s}')", .{@tagName(id)}); + return self.eatToken(id, exclusions) orelse error.UnexpectedToken; + } + + fn getLine(self: *Parser, index: TokenIndex) usize { + return self.line_cols.get(index).?.line; + } + + fn getCol(self: *Parser, index: TokenIndex) usize { + return self.line_cols.get(index).?.col; + } + + fn parseSingleQuoted(self: *Parser, node: *Node.Value, raw: []const u8) ParseError!void { + assert(raw[0] == '\'' and raw[raw.len - 1] == '\''); + + const raw_no_quotes = raw[1 .. raw.len - 1]; + try node.string_value.ensureTotalCapacity(self.allocator, raw_no_quotes.len); + + var state: enum { + start, + escape, + } = .start; + var index: usize = 0; + + while (index < raw_no_quotes.len) : (index += 1) { + const c = raw_no_quotes[index]; + switch (state) { + .start => switch (c) { + '\'' => { + state = .escape; + }, + else => { + node.string_value.appendAssumeCapacity(c); + }, + }, + .escape => switch (c) { + '\'' => { + state = .start; + node.string_value.appendAssumeCapacity(c); + }, + else => return error.InvalidEscapeSequence, + }, + } + } + } + + fn parseDoubleQuoted(self: *Parser, node: *Node.Value, raw: []const u8) ParseError!void { + assert(raw[0] == '"' and raw[raw.len - 1] == '"'); + + const raw_no_quotes = raw[1 .. raw.len - 1]; + try node.string_value.ensureTotalCapacity(self.allocator, raw_no_quotes.len); + + var state: enum { + start, + escape, + } = .start; + + var index: usize = 0; + while (index < raw_no_quotes.len) : (index += 1) { + const c = raw_no_quotes[index]; + switch (state) { + .start => switch (c) { + '\\' => { + state = .escape; + }, + else => { + node.string_value.appendAssumeCapacity(c); + }, + }, + .escape => switch (c) { + 'n' => { + state = .start; + node.string_value.appendAssumeCapacity('\n'); + }, + 't' => { + state = .start; + node.string_value.appendAssumeCapacity('\t'); + }, + '"' => { + state = .start; + node.string_value.appendAssumeCapacity('"'); + }, + else => return error.InvalidEscapeSequence, + }, + } + } + } +}; + +test { + std.testing.refAllDecls(@This()); + _ = @import("parse/test.zig"); +} diff --git a/src/yaml/parse/test.zig b/src/yaml/parse/test.zig new file mode 100644 index 0000000..2906801 --- /dev/null +++ b/src/yaml/parse/test.zig @@ -0,0 +1,764 @@ +const std = @import("std"); +const mem = std.mem; +const testing = std.testing; +const parse = @import("../parse.zig"); + +const Node = parse.Node; +const Tree = parse.Tree; + +test "explicit doc" { + const source = + \\--- !tapi-tbd + \\tbd-version: 4 + \\abc-version: 5 + \\... + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.base.start, 0); + try testing.expectEqual(doc.base.end, tree.tokens.len - 2); + + const directive = tree.tokens[doc.directive.?]; + try testing.expectEqual(directive.id, .literal); + try testing.expectEqualStrings("tapi-tbd", tree.source[directive.start..directive.end]); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.base.start, 5); + try testing.expectEqual(map.base.end, 14); + try testing.expectEqual(map.values.items.len, 2); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .literal); + try testing.expectEqualStrings("tbd-version", tree.source[key.start..key.end]); + + const value = entry.value.?.cast(Node.Value).?; + const value_tok = tree.tokens[value.base.start]; + try testing.expectEqual(value_tok.id, .literal); + try testing.expectEqualStrings("4", tree.source[value_tok.start..value_tok.end]); + } + + { + const entry = map.values.items[1]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .literal); + try testing.expectEqualStrings("abc-version", tree.source[key.start..key.end]); + + const value = entry.value.?.cast(Node.Value).?; + const value_tok = tree.tokens[value.base.start]; + try testing.expectEqual(value_tok.id, .literal); + try testing.expectEqualStrings("5", tree.source[value_tok.start..value_tok.end]); + } +} + +test "leaf in quotes" { + const source = + \\key1: no quotes + \\key2: 'single quoted' + \\key3: "double quoted" + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.base.start, 0); + try testing.expectEqual(doc.base.end, tree.tokens.len - 2); + try testing.expect(doc.directive == null); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.base.start, 0); + try testing.expectEqual(map.base.end, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 3); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .literal); + try testing.expectEqualStrings("key1", tree.source[key.start..key.end]); + + const value = entry.value.?.cast(Node.Value).?; + const start = tree.tokens[value.base.start]; + const end = tree.tokens[value.base.end]; + try testing.expectEqual(start.id, .literal); + try testing.expectEqual(end.id, .literal); + try testing.expectEqualStrings("no quotes", tree.source[start.start..end.end]); + } +} + +test "nested maps" { + const source = + \\key1: + \\ key1_1 : value1_1 + \\ key1_2 : value1_2 + \\key2 : value2 + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.base.start, 0); + try testing.expectEqual(doc.base.end, tree.tokens.len - 2); + try testing.expect(doc.directive == null); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.base.start, 0); + try testing.expectEqual(map.base.end, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 2); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .literal); + try testing.expectEqualStrings("key1", tree.source[key.start..key.end]); + + const nested_map = entry.value.?.cast(Node.Map).?; + try testing.expectEqual(nested_map.base.start, 4); + try testing.expectEqual(nested_map.base.end, 16); + try testing.expectEqual(nested_map.values.items.len, 2); + + { + const nested_entry = nested_map.values.items[0]; + + const nested_key = tree.tokens[nested_entry.key]; + try testing.expectEqual(nested_key.id, .literal); + try testing.expectEqualStrings("key1_1", tree.source[nested_key.start..nested_key.end]); + + const nested_value = nested_entry.value.?.cast(Node.Value).?; + const nested_value_tok = tree.tokens[nested_value.base.start]; + try testing.expectEqual(nested_value_tok.id, .literal); + try testing.expectEqualStrings( + "value1_1", + tree.source[nested_value_tok.start..nested_value_tok.end], + ); + } + + { + const nested_entry = nested_map.values.items[1]; + + const nested_key = tree.tokens[nested_entry.key]; + try testing.expectEqual(nested_key.id, .literal); + try testing.expectEqualStrings("key1_2", tree.source[nested_key.start..nested_key.end]); + + const nested_value = nested_entry.value.?.cast(Node.Value).?; + const nested_value_tok = tree.tokens[nested_value.base.start]; + try testing.expectEqual(nested_value_tok.id, .literal); + try testing.expectEqualStrings( + "value1_2", + tree.source[nested_value_tok.start..nested_value_tok.end], + ); + } + } + + { + const entry = map.values.items[1]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .literal); + try testing.expectEqualStrings("key2", tree.source[key.start..key.end]); + + const value = entry.value.?.cast(Node.Value).?; + const value_tok = tree.tokens[value.base.start]; + try testing.expectEqual(value_tok.id, .literal); + try testing.expectEqualStrings("value2", tree.source[value_tok.start..value_tok.end]); + } +} + +test "map of list of values" { + const source = + \\ints: + \\ - 0 + \\ - 1 + \\ - 2 + ; + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.base.start, 0); + try testing.expectEqual(doc.base.end, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.base.start, 0); + try testing.expectEqual(map.base.end, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .literal); + try testing.expectEqualStrings("ints", tree.source[key.start..key.end]); + + const value = entry.value.?.cast(Node.List).?; + try testing.expectEqual(value.base.start, 4); + try testing.expectEqual(value.base.end, tree.tokens.len - 2); + try testing.expectEqual(value.values.items.len, 3); + + { + const elem = value.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[elem.base.start]; + try testing.expectEqual(leaf.id, .literal); + try testing.expectEqualStrings("0", tree.source[leaf.start..leaf.end]); + } + + { + const elem = value.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[elem.base.start]; + try testing.expectEqual(leaf.id, .literal); + try testing.expectEqualStrings("1", tree.source[leaf.start..leaf.end]); + } + + { + const elem = value.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[elem.base.start]; + try testing.expectEqual(leaf.id, .literal); + try testing.expectEqualStrings("2", tree.source[leaf.start..leaf.end]); + } +} + +test "map of list of maps" { + const source = + \\key1: + \\- key2 : value2 + \\- key3 : value3 + \\- key4 : value4 + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.base.start, 0); + try testing.expectEqual(doc.base.end, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.base.start, 0); + try testing.expectEqual(map.base.end, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .literal); + try testing.expectEqualStrings("key1", tree.source[key.start..key.end]); + + const value = entry.value.?.cast(Node.List).?; + try testing.expectEqual(value.base.start, 3); + try testing.expectEqual(value.base.end, tree.tokens.len - 2); + try testing.expectEqual(value.values.items.len, 3); + + { + const elem = value.values.items[0].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .literal); + try testing.expectEqualStrings("key2", tree.source[nested_key.start..nested_key.end]); + + const nested_v = nested.value.?.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.base.start]; + try testing.expectEqual(leaf.id, .literal); + try testing.expectEqualStrings("value2", tree.source[leaf.start..leaf.end]); + } + + { + const elem = value.values.items[1].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .literal); + try testing.expectEqualStrings("key3", tree.source[nested_key.start..nested_key.end]); + + const nested_v = nested.value.?.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.base.start]; + try testing.expectEqual(leaf.id, .literal); + try testing.expectEqualStrings("value3", tree.source[leaf.start..leaf.end]); + } + + { + const elem = value.values.items[2].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .literal); + try testing.expectEqualStrings("key4", tree.source[nested_key.start..nested_key.end]); + + const nested_v = nested.value.?.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.base.start]; + try testing.expectEqual(leaf.id, .literal); + try testing.expectEqualStrings("value4", tree.source[leaf.start..leaf.end]); + } +} + +test "list of lists" { + const source = + \\- [name , hr, avg ] + \\- [Mark McGwire , 65, 0.278] + \\- [Sammy Sosa , 63, 0.288] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.base.start, 0); + try testing.expectEqual(doc.base.end, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .list); + + const list = doc.value.?.cast(Node.List).?; + try testing.expectEqual(list.base.start, 0); + try testing.expectEqual(list.base.end, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .list); + const nested = list.values.items[0].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("name", tree.source[leaf.start..leaf.end]); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("hr", tree.source[leaf.start..leaf.end]); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("avg", tree.source[leaf.start..leaf.end]); + } + } + + { + try testing.expectEqual(list.values.items[1].tag, .list); + const nested = list.values.items[1].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const start = tree.tokens[value.base.start]; + const end = tree.tokens[value.base.end]; + try testing.expectEqualStrings("Mark McGwire", tree.source[start.start..end.end]); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("65", tree.source[leaf.start..leaf.end]); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("0.278", tree.source[leaf.start..leaf.end]); + } + } + + { + try testing.expectEqual(list.values.items[2].tag, .list); + const nested = list.values.items[2].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const start = tree.tokens[value.base.start]; + const end = tree.tokens[value.base.end]; + try testing.expectEqualStrings("Sammy Sosa", tree.source[start.start..end.end]); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("63", tree.source[leaf.start..leaf.end]); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("0.288", tree.source[leaf.start..leaf.end]); + } + } +} + +test "inline list" { + const source = + \\[name , hr, avg ] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.base.start, 0); + try testing.expectEqual(doc.base.end, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .list); + + const list = doc.value.?.cast(Node.List).?; + try testing.expectEqual(list.base.start, 0); + try testing.expectEqual(list.base.end, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .value); + const value = list.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("name", tree.source[leaf.start..leaf.end]); + } + + { + try testing.expectEqual(list.values.items[1].tag, .value); + const value = list.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("hr", tree.source[leaf.start..leaf.end]); + } + + { + try testing.expectEqual(list.values.items[2].tag, .value); + const value = list.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("avg", tree.source[leaf.start..leaf.end]); + } +} + +test "inline list as mapping value" { + const source = + \\key : [ + \\ name , + \\ hr, avg ] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.base.start, 0); + try testing.expectEqual(doc.base.end, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.base.start, 0); + try testing.expectEqual(map.base.end, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .literal); + try testing.expectEqualStrings("key", tree.source[key.start..key.end]); + + const list = entry.value.?.cast(Node.List).?; + try testing.expectEqual(list.base.start, 4); + try testing.expectEqual(list.base.end, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .value); + const value = list.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("name", tree.source[leaf.start..leaf.end]); + } + + { + try testing.expectEqual(list.values.items[1].tag, .value); + const value = list.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("hr", tree.source[leaf.start..leaf.end]); + } + + { + try testing.expectEqual(list.values.items[2].tag, .value); + const value = list.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.base.start]; + try testing.expectEqualStrings("avg", tree.source[leaf.start..leaf.end]); + } +} + +fn parseSuccess(comptime source: []const u8) !void { + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); +} + +fn parseError(comptime source: []const u8, err: parse.ParseError) !void { + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try testing.expectError(err, tree.parse(source)); +} + +test "empty doc with spaces and comments" { + try parseSuccess( + \\ + \\ + \\ # this is a comment in a weird place + \\# and this one is too + ); +} + +test "comment between --- and ! in document start" { + try parseError( + \\--- # what is it? + \\! + , error.UnexpectedToken); +} + +test "correct doc start with tag" { + try parseSuccess( + \\--- !some-tag + \\ + ); +} + +test "doc close without explicit doc open" { + try parseError( + \\ + \\ + \\# something cool + \\... + , error.UnexpectedToken); +} + +test "doc open and close are ok" { + try parseSuccess( + \\--- + \\# first doc + \\ + \\ + \\--- + \\# second doc + \\ + \\ + \\... + ); +} + +test "doc with a single string is ok" { + try parseSuccess( + \\a string of some sort + \\ + ); +} + +test "explicit doc with a single string is ok" { + try parseSuccess( + \\--- !anchor + \\# nothing to see here except one string + \\ # not a lot to go on with + \\a single string + \\... + ); +} + +test "doc with two string is bad" { + try parseError( + \\first + \\second + \\# this should fail already + , error.UnexpectedToken); +} + +test "single quote string can have new lines" { + try parseSuccess( + \\'what is this + \\ thing?' + ); +} + +test "single quote string on one line is fine" { + try parseSuccess( + \\'here''s an apostrophe' + ); +} + +test "double quote string can have new lines" { + try parseSuccess( + \\"what is this + \\ thing?" + ); +} + +test "double quote string on one line is fine" { + try parseSuccess( + \\"a newline\nand a\ttab" + ); +} + +test "map with key and value literals" { + try parseSuccess( + \\key1: val1 + \\key2 : val2 + ); +} + +test "map of maps" { + try parseSuccess( + \\ + \\# the first key + \\key1: + \\ # the first subkey + \\ key1_1: 0 + \\ key1_2: 1 + \\# the second key + \\key2: + \\ key2_1: -1 + \\ key2_2: -2 + \\# the end of map + ); +} + +test "map value indicator needs to be on the same line" { + try parseError( + \\a + \\ : b + , error.UnexpectedToken); +} + +test "value needs to be indented" { + try parseError( + \\a: + \\b + , error.MalformedYaml); +} + +test "comment between a key and a value is fine" { + try parseSuccess( + \\a: + \\ # this is a value + \\ b + ); +} + +test "simple list" { + try parseSuccess( + \\# first el + \\- a + \\# second el + \\- b + \\# third el + \\- c + ); +} + +test "list indentation matters" { + try parseSuccess( + \\ - a + \\- b + ); + + try parseSuccess( + \\- a + \\ - b + ); +} + +test "unindented list is fine too" { + try parseSuccess( + \\a: + \\- 0 + \\- 1 + ); +} + +test "empty values in a map" { + try parseSuccess( + \\a: + \\b: + \\- 0 + ); +} + +test "weirdly nested map of maps of lists" { + try parseSuccess( + \\a: + \\ b: + \\ - 0 + \\ - 1 + ); +} + +test "square brackets denote a list" { + try parseSuccess( + \\[ a, + \\ b, c ] + ); +} + +test "empty list" { + try parseSuccess( + \\[ ] + ); +} + +test "comment within a bracketed list is an error" { + try parseError( + \\[ # something + \\] + , error.MalformedYaml); +} + +test "mixed ints with floats in a list" { + try parseSuccess( + \\[0, 1.0] + ); +} diff --git a/src/yaml/yaml.zig b/src/yaml/yaml.zig new file mode 100644 index 0000000..e39ca90 --- /dev/null +++ b/src/yaml/yaml.zig @@ -0,0 +1,526 @@ +const std = @import("std"); +const assert = std.debug.assert; +const math = std.math; +const mem = std.mem; +const log = std.log.scoped(.yaml); + +const Allocator = mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; + +pub const Tokenizer = @import("Tokenizer.zig"); +pub const parse = @import("parse.zig"); + +const Node = parse.Node; +const Tree = parse.Tree; +const ParseError = parse.ParseError; + +pub const YamlError = error{ + UnexpectedNodeType, + DuplicateMapKey, + OutOfMemory, + CannotEncodeValue, +} || ParseError || std.fmt.ParseIntError; + +pub const List = []Value; +pub const Map = std.StringArrayHashMap(Value); + +pub const Value = union(enum) { + empty, + int: i64, + float: f64, + string: []const u8, + list: List, + map: Map, + + pub fn asInt(self: Value) !i64 { + if (self != .int) return error.TypeMismatch; + return self.int; + } + + pub fn asFloat(self: Value) !f64 { + if (self != .float) return error.TypeMismatch; + return self.float; + } + + pub fn asString(self: Value) ![]const u8 { + if (self != .string) return error.TypeMismatch; + return self.string; + } + + pub fn asList(self: Value) !List { + if (self == .list) { + return self.list; + } else if (self == .string) { + const str = self.string; + if (!(std.mem.startsWith(u8, self.string, "0x") and str.len % 2 == 0 and str.len > 2)) { + return error.TypeMismatch; + } + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + // defer arena.deinit(); + + var list = try arena.allocator().alloc(Value, str.len / 2 - 1); + // var list = std.ArrayList(Value).init(arena.allocator()); + // defer list.deinit(); + var i: usize = 2; + while (i < str.len) : (i += 2) { + if (i + 1 >= str.len) break; + const hex_str = str[i .. i + 2]; + const num = std.fmt.parseInt(u8, hex_str, 16) catch continue; + list[i / 2 - 1] = Value{ .int = num }; + } + return list; + } else { + return error.TypeMismatch; + } + } + + pub fn asMap(self: Value) !Map { + if (self != .map) return error.TypeMismatch; + return self.map; + } + + const StringifyArgs = struct { + indentation: usize = 0, + should_inline_first_key: bool = false, + }; + + pub fn stringify(self: Value, writer: anytype, args: StringifyArgs) anyerror!void { + switch (self) { + .empty => return, + .int => |int| return writer.print("{}", .{int}), + .float => |float| return writer.print("{d}", .{float}), + .string => |string| return writer.print("{s}", .{string}), + .list => |list| { + const len = list.len; + if (len == 0) return; + + const first = list[0]; + if (first.isCompound()) { + for (list, 0..) |elem, i| { + try writer.writeByteNTimes(' ', args.indentation); + try writer.writeAll("- "); + try elem.stringify(writer, .{ + .indentation = args.indentation + 2, + .should_inline_first_key = true, + }); + if (i < len - 1) { + try writer.writeByte('\n'); + } + } + return; + } + + try writer.writeAll("[ "); + for (list, 0..) |elem, i| { + try elem.stringify(writer, args); + if (i < len - 1) { + try writer.writeAll(", "); + } + } + try writer.writeAll(" ]"); + }, + .map => |map| { + const len = map.count(); + if (len == 0) return; + + var i: usize = 0; + for (map.keys(), map.values()) |key, value| { + if (!args.should_inline_first_key or i != 0) { + try writer.writeByteNTimes(' ', args.indentation); + } + try writer.print("{s}: ", .{key}); + + const should_inline = blk: { + if (!value.isCompound()) break :blk true; + if (value == .list and value.list.len > 0 and !value.list[0].isCompound()) break :blk true; + break :blk false; + }; + + if (should_inline) { + try value.stringify(writer, args); + } else { + try writer.writeByte('\n'); + try value.stringify(writer, .{ + .indentation = args.indentation + 4, + }); + } + + if (i < len - 1) { + try writer.writeByte('\n'); + } + + i += 1; + } + }, + } + } + + fn isCompound(self: Value) bool { + return switch (self) { + .list, .map => true, + else => false, + }; + } + + fn fromNode(arena: Allocator, tree: *const Tree, node: *const Node) YamlError!Value { + if (node.cast(Node.Doc)) |doc| { + const inner = doc.value orelse { + // empty doc + return Value{ .empty = {} }; + }; + return Value.fromNode(arena, tree, inner); + } else if (node.cast(Node.Map)) |map| { + // TODO use ContextAdapted HashMap and do not duplicate keys, intern + // in a contiguous string buffer. + var out_map = std.StringArrayHashMap(Value).init(arena); + try out_map.ensureUnusedCapacity(math.cast(u32, map.values.items.len) orelse return error.Overflow); + + for (map.values.items) |entry| { + const key = try arena.dupe(u8, tree.getRaw(entry.key, entry.key)); + const gop = out_map.getOrPutAssumeCapacity(key); + if (gop.found_existing) { + return error.DuplicateMapKey; + } + const value = if (entry.value) |value| + try Value.fromNode(arena, tree, value) + else + .empty; + gop.value_ptr.* = value; + } + + return Value{ .map = out_map }; + } else if (node.cast(Node.List)) |list| { + var out_list = std.ArrayList(Value).init(arena); + try out_list.ensureUnusedCapacity(list.values.items.len); + + for (list.values.items) |elem| { + const value = try Value.fromNode(arena, tree, elem); + out_list.appendAssumeCapacity(value); + } + + return Value{ .list = try out_list.toOwnedSlice() }; + } else if (node.cast(Node.Value)) |value| { + const raw = tree.getRaw(node.start, node.end); + + try_int: { + const int = std.fmt.parseInt(i64, raw, 0) catch break :try_int; + return Value{ .int = int }; + } + + try_float: { + const float = std.fmt.parseFloat(f64, raw) catch break :try_float; + return Value{ .float = float }; + } + + return Value{ .string = try arena.dupe(u8, value.string_value.items) }; + } else { + log.debug("Unexpected node type: {}", .{node.tag}); + return error.UnexpectedNodeType; + } + } + + fn encode(arena: Allocator, input: anytype) YamlError!?Value { + switch (@typeInfo(@TypeOf(input))) { + .comptime_int, + .int, + => return Value{ .int = math.cast(i64, input) orelse return error.Overflow }, + + .float => return Value{ .float = math.lossyCast(f64, input) }, + + .@"struct" => |info| if (info.is_tuple) { + var list = std.ArrayList(Value).init(arena); + errdefer list.deinit(); + try list.ensureTotalCapacityPrecise(info.fields.len); + + inline for (info.fields) |field| { + if (try encode(arena, @field(input, field.name))) |value| { + list.appendAssumeCapacity(value); + } + } + + return Value{ .list = try list.toOwnedSlice() }; + } else { + var map = Map.init(arena); + errdefer map.deinit(); + try map.ensureTotalCapacity(info.fields.len); + + inline for (info.fields) |field| { + if (try encode(arena, @field(input, field.name))) |value| { + const key = try arena.dupe(u8, field.name); + map.putAssumeCapacityNoClobber(key, value); + } + } + + return Value{ .map = map }; + }, + + .@"union" => |info| if (info.tag_type) |tag_type| { + inline for (info.fields) |field| { + if (@field(tag_type, field.name) == input) { + return try encode(arena, @field(input, field.name)); + } + } else unreachable; + } else return error.UntaggedUnion, + + .array => return encode(arena, &input), + + .pointer => |info| switch (info.size) { + .One => switch (@typeInfo(info.child)) { + .array => |child_info| { + const Slice = []const child_info.child; + return encode(arena, @as(Slice, input)); + }, + else => { + @compileError("Unhandled type: {s}" ++ @typeName(info.child)); + }, + }, + .Slice => { + if (info.child == u8) { + return Value{ .string = try arena.dupe(u8, input) }; + } + + var list = std.ArrayList(Value).init(arena); + errdefer list.deinit(); + try list.ensureTotalCapacityPrecise(input.len); + + for (input) |elem| { + if (try encode(arena, elem)) |value| { + list.appendAssumeCapacity(value); + } else { + log.debug("Could not encode value in a list: {any}", .{elem}); + return error.CannotEncodeValue; + } + } + + return Value{ .list = try list.toOwnedSlice() }; + }, + else => { + @compileError("Unhandled type: {s}" ++ @typeName(@TypeOf(input))); + }, + }, + + // TODO we should probably have an option to encode `null` and also + // allow for some default value too. + .optional => return if (input) |val| encode(arena, val) else null, + + .null => return null, + + else => { + @compileError("Unhandled type: {s}" ++ @typeName(@TypeOf(input))); + }, + } + } +}; + +pub const Yaml = struct { + arena: ArenaAllocator, + tree: ?Tree = null, + docs: std.ArrayList(Value), + + pub fn deinit(self: *Yaml) void { + self.arena.deinit(); + } + + pub fn load(allocator: Allocator, source: []const u8) !Yaml { + var arena = ArenaAllocator.init(allocator); + errdefer arena.deinit(); + + var tree = Tree.init(arena.allocator()); + try tree.parse(source); + + var docs = std.ArrayList(Value).init(arena.allocator()); + try docs.ensureTotalCapacityPrecise(tree.docs.items.len); + + for (tree.docs.items) |node| { + const value = try Value.fromNode(arena.allocator(), &tree, node); + docs.appendAssumeCapacity(value); + } + + return Yaml{ + .arena = arena, + .tree = tree, + .docs = docs, + }; + } + + pub const Error = error{ + Unimplemented, + TypeMismatch, + StructFieldMissing, + ArraySizeMismatch, + UntaggedUnion, + UnionTagMissing, + Overflow, + OutOfMemory, + }; + + pub fn parse(self: *Yaml, comptime T: type) Error!T { + if (self.docs.items.len == 0) { + if (@typeInfo(T) == .void) return {}; + return error.TypeMismatch; + } + + if (self.docs.items.len == 1) { + return self.parseValue(T, self.docs.items[0]); + } + + switch (@typeInfo(T)) { + .array => |info| { + var parsed: T = undefined; + for (self.docs.items, 0..) |doc, i| { + parsed[i] = try self.parseValue(info.child, doc); + } + return parsed; + }, + .pointer => |info| { + switch (info.size) { + .Slice => { + var parsed = try self.arena.allocator().alloc(info.child, self.docs.items.len); + for (self.docs.items, 0..) |doc, i| { + parsed[i] = try self.parseValue(info.child, doc); + } + return parsed; + }, + else => return error.TypeMismatch, + } + }, + .@"union" => return error.Unimplemented, + else => return error.TypeMismatch, + } + } + + fn parseValue(self: *Yaml, comptime T: type, value: Value) Error!T { + return switch (@typeInfo(T)) { + .int => math.cast(T, try value.asInt()) orelse return error.Overflow, + .float => if (value.asFloat()) |float| { + return math.lossyCast(T, float); + } else |_| { + return math.lossyCast(T, try value.asInt()); + }, + .@"struct" => self.parseStruct(T, try value.asMap()), + .@"union" => self.parseUnion(T, value), + .array => self.parseArray(T, try value.asList()), + .pointer => if (value.asList()) |list| { + return self.parsePointer(T, .{ .list = list }); + } else |_| { + return self.parsePointer(T, .{ .string = try value.asString() }); + }, + .void => error.TypeMismatch, + .optional => unreachable, + else => error.Unimplemented, + }; + } + + fn parseUnion(self: *Yaml, comptime T: type, value: Value) Error!T { + const union_info = @typeInfo(T).@"union"; + + if (union_info.tag_type) |_| { + inline for (union_info.fields) |field| { + if (self.parseValue(field.type, value)) |u_value| { + return @unionInit(T, field.name, u_value); + } else |err| switch (err) { + error.TypeMismatch => {}, + error.StructFieldMissing => {}, + else => return err, + } + } + } else return error.UntaggedUnion; + + return error.UnionTagMissing; + } + + fn parseOptional(self: *Yaml, comptime T: type, value: ?Value) Error!T { + const unwrapped = value orelse return null; + const opt_info = @typeInfo(T).optional; + return @as(T, try self.parseValue(opt_info.child, unwrapped)); + } + + fn parseStruct(self: *Yaml, comptime T: type, map: Map) Error!T { + const struct_info = @typeInfo(T).@"struct"; + var parsed: T = undefined; + + inline for (struct_info.fields) |field| { + const value: ?Value = map.get(field.name) orelse blk: { + const field_name = try mem.replaceOwned(u8, self.arena.allocator(), field.name, "_", "-"); + break :blk map.get(field_name); + }; + + if (@typeInfo(field.type) == .optional) { + @field(parsed, field.name) = try self.parseOptional(field.type, value); + continue; + } + + const unwrapped = value orelse { + log.debug("missing struct field: {s}: {s}", .{ field.name, @typeName(field.type) }); + return error.StructFieldMissing; + }; + @field(parsed, field.name) = try self.parseValue(field.type, unwrapped); + } + + return parsed; + } + + fn parsePointer(self: *Yaml, comptime T: type, value: Value) Error!T { + const ptr_info = @typeInfo(T).pointer; + const arena = self.arena.allocator(); + + switch (ptr_info.size) { + .Slice => { + if (ptr_info.child == u8) { + return value.asString(); + } + + var parsed = try arena.alloc(ptr_info.child, value.list.len); + for (value.list, 0..) |elem, i| { + parsed[i] = try self.parseValue(ptr_info.child, elem); + } + return parsed; + }, + else => return error.Unimplemented, + } + } + + fn parseArray(self: *Yaml, comptime T: type, list: List) Error!T { + const array_info = @typeInfo(T).array; + if (array_info.len != list.len) return error.ArraySizeMismatch; + + var parsed: T = undefined; + for (list, 0..) |elem, i| { + parsed[i] = try self.parseValue(array_info.child, elem); + } + + return parsed; + } + + pub fn stringify(self: Yaml, writer: anytype) !void { + for (self.docs.items, 0..) |doc, i| { + try writer.writeAll("---"); + if (self.tree.?.getDirective(i)) |directive| { + try writer.print(" !{s}", .{directive}); + } + try writer.writeByte('\n'); + try doc.stringify(writer, .{}); + try writer.writeByte('\n'); + } + try writer.writeAll("...\n"); + } +}; + +pub fn stringify(allocator: Allocator, input: anytype, writer: anytype) !void { + var arena = ArenaAllocator.init(allocator); + defer arena.deinit(); + + const maybe_value = try Value.encode(arena.allocator(), input); + + if (maybe_value) |value| { + // TODO should we output as an explicit doc? + // How can allow the user to specify? + try value.stringify(writer, .{}); + } +} + +test { + std.testing.refAllDecls(Tokenizer); + std.testing.refAllDecls(parse); + _ = @import("yaml/test.zig"); +} diff --git a/src/yaml/yaml/test.zig b/src/yaml/yaml/test.zig new file mode 100644 index 0000000..2d418ab --- /dev/null +++ b/src/yaml/yaml/test.zig @@ -0,0 +1,539 @@ +const std = @import("std"); +const mem = std.mem; +const testing = std.testing; + +const yaml_mod = @import("../yaml.zig"); +const Yaml = yaml_mod.Yaml; + +test "simple list" { + const source = + \\- a + \\- b + \\- c + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const list = yaml.docs.items[0].list; + try testing.expectEqual(list.len, 3); + + try testing.expectEqualStrings("a", list[0].string); + try testing.expectEqualStrings("b", list[1].string); + try testing.expectEqualStrings("c", list[2].string); +} + +test "simple list typed as array of strings" { + const source = + \\- a + \\- b + \\- c + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3][]const u8); + try testing.expectEqual(3, arr.len); + try testing.expectEqualStrings("a", arr[0]); + try testing.expectEqualStrings("b", arr[1]); + try testing.expectEqualStrings("c", arr[2]); +} + +test "simple list typed as array of ints" { + const source = + \\- 0 + \\- 1 + \\- 2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3]u8); + try testing.expectEqualSlices(u8, &[_]u8{ 0, 1, 2 }, &arr); +} + +test "list of mixed sign integer" { + const source = + \\- 0 + \\- -1 + \\- 2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3]i8); + try testing.expectEqualSlices(i8, &[_]i8{ 0, -1, 2 }, &arr); +} + +test "several integer bases" { + const source = + \\- 10 + \\- -10 + \\- 0x10 + \\- -0X10 + \\- 0o10 + \\- -0O10 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([6]i8); + try testing.expectEqualSlices(i8, &[_]i8{ 10, -10, 16, -16, 8, -8 }, &arr); +} + +test "simple map untyped" { + const source = + \\a: 0 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("a")); + try testing.expectEqual(@as(i64, 0), map.get("a").?.int); +} + +test "simple map untyped with a list of maps" { + const source = + \\a: 0 + \\b: + \\ - foo: 1 + \\ bar: 2 + \\ - foo: 3 + \\ bar: 4 + \\c: 1 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("a")); + try testing.expect(map.contains("b")); + try testing.expect(map.contains("c")); + try testing.expectEqual(@as(i64, 0), map.get("a").?.int); + try testing.expectEqual(@as(i64, 1), map.get("c").?.int); + try testing.expectEqual(@as(i64, 1), map.get("b").?.list[0].map.get("foo").?.int); + try testing.expectEqual(@as(i64, 2), map.get("b").?.list[0].map.get("bar").?.int); + try testing.expectEqual(@as(i64, 3), map.get("b").?.list[1].map.get("foo").?.int); + try testing.expectEqual(@as(i64, 4), map.get("b").?.list[1].map.get("bar").?.int); +} + +test "simple map untyped with a list of maps. no indent" { + const source = + \\b: + \\- foo: 1 + \\c: 1 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("b")); + try testing.expect(map.contains("c")); + try testing.expectEqual(@as(i64, 1), map.get("c").?.int); + try testing.expectEqual(@as(i64, 1), map.get("b").?.list[0].map.get("foo").?.int); +} + +test "simple map untyped with a list of maps. no indent 2" { + const source = + \\a: 0 + \\b: + \\- foo: 1 + \\ bar: 2 + \\- foo: 3 + \\ bar: 4 + \\c: 1 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("a")); + try testing.expect(map.contains("b")); + try testing.expect(map.contains("c")); + try testing.expectEqual(@as(i64, 0), map.get("a").?.int); + try testing.expectEqual(@as(i64, 1), map.get("c").?.int); + try testing.expectEqual(@as(i64, 1), map.get("b").?.list[0].map.get("foo").?.int); + try testing.expectEqual(@as(i64, 2), map.get("b").?.list[0].map.get("bar").?.int); + try testing.expectEqual(@as(i64, 3), map.get("b").?.list[1].map.get("foo").?.int); + try testing.expectEqual(@as(i64, 4), map.get("b").?.list[1].map.get("bar").?.int); +} + +test "simple map typed" { + const source = + \\a: 0 + \\b: hello there + \\c: 'wait, what?' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { a: usize, b: []const u8, c: []const u8 }); + try testing.expectEqual(@as(usize, 0), simple.a); + try testing.expectEqualStrings("hello there", simple.b); + try testing.expectEqualStrings("wait, what?", simple.c); +} + +test "typed nested structs" { + const source = + \\a: + \\ b: hello there + \\ c: 'wait, what?' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { + a: struct { + b: []const u8, + c: []const u8, + }, + }); + try testing.expectEqualStrings("hello there", simple.a.b); + try testing.expectEqualStrings("wait, what?", simple.a.c); +} + +test "typed union with nested struct" { + const source = + \\a: + \\ b: hello there + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(union(enum) { + tag_a: struct { + a: struct { + b: []const u8, + }, + }, + tag_c: struct { + c: struct { + d: []const u8, + }, + }, + }); + try testing.expectEqualStrings("hello there", simple.tag_a.a.b); +} + +test "typed union with nested struct 2" { + const source = + \\c: + \\ d: hello there + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(union(enum) { + tag_a: struct { + a: struct { + b: []const u8, + }, + }, + tag_c: struct { + c: struct { + d: []const u8, + }, + }, + }); + try testing.expectEqualStrings("hello there", simple.tag_c.c.d); +} + +test "single quoted string" { + const source = + \\- 'hello' + \\- 'here''s an escaped quote' + \\- 'newlines and tabs\nare not\tsupported' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const arr = try yaml.parse([3][]const u8); + try testing.expectEqual(arr.len, 3); + try testing.expectEqualStrings("hello", arr[0]); + try testing.expectEqualStrings("here's an escaped quote", arr[1]); + try testing.expectEqualStrings("newlines and tabs\\nare not\\tsupported", arr[2]); +} + +test "double quoted string" { + const source = + \\- "hello" + \\- "\"here\" are some escaped quotes" + \\- "newlines and tabs\nare\tsupported" + \\- "let's have + \\some fun!" + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const arr = try yaml.parse([4][]const u8); + try testing.expectEqual(arr.len, 4); + try testing.expectEqualStrings("hello", arr[0]); + try testing.expectEqualStrings( + \\"here" are some escaped quotes + , arr[1]); + try testing.expectEqualStrings("newlines and tabs\nare\tsupported", arr[2]); + try testing.expectEqualStrings( + \\let's have + \\some fun! + , arr[3]); +} + +test "multidoc typed as a slice of structs" { + const source = + \\--- + \\a: 0 + \\--- + \\a: 1 + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + { + const result = try yaml.parse([2]struct { a: usize }); + try testing.expectEqual(result.len, 2); + try testing.expectEqual(result[0].a, 0); + try testing.expectEqual(result[1].a, 1); + } + + { + const result = try yaml.parse([]struct { a: usize }); + try testing.expectEqual(result.len, 2); + try testing.expectEqual(result[0].a, 0); + try testing.expectEqual(result[1].a, 1); + } +} + +test "multidoc typed as a struct is an error" { + const source = + \\--- + \\a: 0 + \\--- + \\b: 1 + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize })); + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { b: usize })); + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize, b: usize })); +} + +test "multidoc typed as a slice of structs with optionals" { + const source = + \\--- + \\a: 0 + \\c: 1.0 + \\--- + \\a: 1 + \\b: different field + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const result = try yaml.parse([]struct { a: usize, b: ?[]const u8, c: ?f16 }); + try testing.expectEqual(result.len, 2); + + try testing.expectEqual(result[0].a, 0); + try testing.expect(result[0].b == null); + try testing.expect(result[0].c != null); + try testing.expectEqual(result[0].c.?, 1.0); + + try testing.expectEqual(result[1].a, 1); + try testing.expect(result[1].b != null); + try testing.expectEqualStrings("different field", result[1].b.?); + try testing.expect(result[1].c == null); +} + +test "empty yaml can be represented as void" { + const source = ""; + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + const result = try yaml.parse(void); + try testing.expect(@TypeOf(result) == void); +} + +test "nonempty yaml cannot be represented as void" { + const source = + \\a: b + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(void)); +} + +test "typed array size mismatch" { + const source = + \\- 0 + \\- 0 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([1]usize)); + try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([5]usize)); +} + +test "comments" { + const source = + \\ + \\key: # this is the key + \\# first value + \\ + \\- val1 + \\ + \\# second value + \\- val2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { + key: []const []const u8, + }); + try testing.expect(simple.key.len == 2); + try testing.expectEqualStrings("val1", simple.key[0]); + try testing.expectEqualStrings("val2", simple.key[1]); +} + +test "promote ints to floats in a list mixed numeric types" { + const source = + \\a_list: [0, 1.0] + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { + a_list: []const f64, + }); + try testing.expectEqualSlices(f64, &[_]f64{ 0.0, 1.0 }, simple.a_list); +} + +test "demoting floats to ints in a list is an error" { + const source = + \\a_list: [0, 1.0] + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(error.TypeMismatch, yaml.parse(struct { + a_list: []const u64, + })); +} + +test "duplicate map keys" { + const source = + \\a: b + \\a: c + ; + try testing.expectError(error.DuplicateMapKey, Yaml.load(testing.allocator, source)); +} + +fn testStringify(expected: []const u8, input: anytype) !void { + var output = std.ArrayList(u8).init(testing.allocator); + defer output.deinit(); + + try yaml_mod.stringify(testing.allocator, input, output.writer()); + try testing.expectEqualStrings(expected, output.items); +} + +test "stringify an int" { + try testStringify("128", @as(u32, 128)); +} + +test "stringify a simple struct" { + try testStringify( + \\a: 1 + \\b: 2 + \\c: 2.5 + , struct { a: i64, b: f64, c: f64 }{ .a = 1, .b = 2.0, .c = 2.5 }); +} + +test "stringify a struct with an optional" { + try testStringify( + \\a: 1 + \\b: 2 + \\c: 2.5 + , struct { a: i64, b: ?f64, c: f64 }{ .a = 1, .b = 2.0, .c = 2.5 }); + + try testStringify( + \\a: 1 + \\c: 2.5 + , struct { a: i64, b: ?f64, c: f64 }{ .a = 1, .b = null, .c = 2.5 }); +} + +test "stringify a struct with all optionals" { + try testStringify("", struct { a: ?i64, b: ?f64 }{ .a = null, .b = null }); +} + +test "stringify an optional" { + try testStringify("", null); + try testStringify("", @as(?u64, null)); +} + +test "stringify a union" { + const Dummy = union(enum) { + x: u64, + y: f64, + }; + try testStringify("a: 1", struct { a: Dummy }{ .a = .{ .x = 1 } }); + try testStringify("a: 2.1", struct { a: Dummy }{ .a = .{ .y = 2.1 } }); +} + +test "stringify a string" { + try testStringify("a: name", struct { a: []const u8 }{ .a = "name" }); + try testStringify("name", "name"); +} + +test "stringify a list" { + try testStringify("[ 1, 2, 3 ]", @as([]const u64, &.{ 1, 2, 3 })); + try testStringify("[ 1, 2, 3 ]", .{ @as(i64, 1), 2, 3 }); + try testStringify("[ 1, name, 3 ]", .{ 1, "name", 3 }); + + const arr: [3]i64 = .{ 1, 2, 3 }; + try testStringify("[ 1, 2, 3 ]", arr); +} From 791572dab82800136dbcd2eb09890f6226d7b7c8 Mon Sep 17 00:00:00 2001 From: fearlessfe <505380967@qq.com> Date: Tue, 29 Oct 2024 07:15:11 +0100 Subject: [PATCH 5/5] feat: add source for yaml parser lib --- clib/libyaml | 1 - src/yaml/Tokenizer.zig | 2 ++ src/yaml/parse.zig | 4 ++++ src/yaml/parse/test.zig | 2 ++ src/yaml/yaml.zig | 2 ++ src/yaml/yaml/test.zig | 2 ++ 6 files changed, 12 insertions(+), 1 deletion(-) delete mode 160000 clib/libyaml diff --git a/clib/libyaml b/clib/libyaml deleted file mode 160000 index 840b65c..0000000 --- a/clib/libyaml +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 840b65c40675e2d06bf40405ad3f12dec7f35923 diff --git a/src/yaml/Tokenizer.zig b/src/yaml/Tokenizer.zig index 47e106e..7ef83b5 100644 --- a/src/yaml/Tokenizer.zig +++ b/src/yaml/Tokenizer.zig @@ -1,3 +1,5 @@ +//! The code bellow is essentially a port of https://github.com/kubkon/zig-yaml + const Tokenizer = @This(); const std = @import("std"); diff --git a/src/yaml/parse.zig b/src/yaml/parse.zig index 9648f8e..fa711a4 100644 --- a/src/yaml/parse.zig +++ b/src/yaml/parse.zig @@ -1,3 +1,7 @@ +//! The code bellow is essentially a port of https://github.com/kubkon/zig-yaml +//! to the most recent version of zig with a couple of stylistic changes and support for +//! json yaml. + const std = @import("std"); const assert = std.debug.assert; const log = std.log.scoped(.parse); diff --git a/src/yaml/parse/test.zig b/src/yaml/parse/test.zig index 2906801..284f06a 100644 --- a/src/yaml/parse/test.zig +++ b/src/yaml/parse/test.zig @@ -1,3 +1,5 @@ +//! The code bellow is essentially a port of https://github.com/kubkon/zig-yaml + const std = @import("std"); const mem = std.mem; const testing = std.testing; diff --git a/src/yaml/yaml.zig b/src/yaml/yaml.zig index e39ca90..ea2934c 100644 --- a/src/yaml/yaml.zig +++ b/src/yaml/yaml.zig @@ -1,3 +1,5 @@ +//! The code bellow is essentially a port of https://github.com/kubkon/zig-yaml + const std = @import("std"); const assert = std.debug.assert; const math = std.math; diff --git a/src/yaml/yaml/test.zig b/src/yaml/yaml/test.zig index 2d418ab..0e11edf 100644 --- a/src/yaml/yaml/test.zig +++ b/src/yaml/yaml/test.zig @@ -1,3 +1,5 @@ +//! The code bellow is essentially a port of https://github.com/kubkon/zig-yaml + const std = @import("std"); const mem = std.mem; const testing = std.testing;