From 2b96a077b44ab510ec8e2ca6588c8071a1d1f623 Mon Sep 17 00:00:00 2001 From: LakshanWeerasinghe Date: Wed, 12 Jun 2024 15:38:59 +0530 Subject: [PATCH] Fix block header issue and add tests --- ballerina/tests/parse_string.bal | 8 ++++ ballerina/tests/parse_string_negative.bal | 5 ++- .../resources/negative/negative_test_14.yaml | 1 + ballerina/tests/resources/nested_19.yaml | 5 +++ ballerina/tests/resources/simple_yaml_1a.yaml | 2 +- .../lib/data/yaml/lexer/LexerState.java | 39 ++++++++----------- .../lib/data/yaml/lexer/Scanner.java | 14 +++++++ 7 files changed, 48 insertions(+), 26 deletions(-) create mode 100644 ballerina/tests/resources/negative/negative_test_14.yaml create mode 100644 ballerina/tests/resources/nested_19.yaml diff --git a/ballerina/tests/parse_string.bal b/ballerina/tests/parse_string.bal index cacff13..3203952 100644 --- a/ballerina/tests/parse_string.bal +++ b/ballerina/tests/parse_string.bal @@ -737,6 +737,14 @@ isolated function testTupleNestedMappingWithProjection1() returns error? { test:assertEquals(value, expectedResult); } +@test:Config +isolated function testBlockScalarValue() returns error? { + string content = check io:fileReadString(FILE_PATH + "nested_19.yaml"); + string[] value = check parseString(content); + string[] expectedResult = ["\n", "\n", "block scalar\nvalue\n"]; + test:assertEquals(value, expectedResult); +} + @test:Config isolated function testByteAsExpectedTypeForParseString() returns error? { byte result = check parseString("1"); diff --git a/ballerina/tests/parse_string_negative.bal b/ballerina/tests/parse_string_negative.bal index 41088c1..5b81fc9 100644 --- a/ballerina/tests/parse_string_negative.bal +++ b/ballerina/tests/parse_string_negative.bal @@ -32,7 +32,7 @@ isolated function negtiveTests(string path, string expectedErrMsg) returns io:Er function negativeDataProvider() returns [string, string][] => [ ["negative_test_1.yaml", "'non printable character found' at line: '2' column: '13'"], ["negative_test_2.yaml", "'invalid indentation' at line: '5' column: '5'"], - ["negative_test_3.yaml", "'invalid block header' at line: '1' column: '2'"], + ["negative_test_3.yaml", "'insufficient indentation for a scalar' at line: '2' column: '4'"], ["negative_test_4.yaml", "'insufficient indentation for a scalar' at line: '3' column: '4'"], ["negative_test_5.yaml", "'insufficient indentation for a scalar' at line: '4' column: '4'"], [ @@ -45,7 +45,8 @@ function negativeDataProvider() returns [string, string][] => [ ["negative_test_10.yaml", "'unexpected event' at line: '1' column: '10'"], ["negative_test_11.yaml", "'unexpected event' at line: '1' column: '8'"], ["negative_test_12.yaml", "'unexpected event error' at line: '1' column: '5'"], - ["negative_test_13.yaml", "'cannot have block sequence under flow collection' at line: '2' column: '3'"] + ["negative_test_13.yaml", "'cannot have block sequence under flow collection' at line: '2' column: '3'"], + ["negative_test_14.yaml", "''-' cannot be defined after tag properties' at line: '1' column: '7'"] ]; @test:Config { diff --git a/ballerina/tests/resources/negative/negative_test_14.yaml b/ballerina/tests/resources/negative/negative_test_14.yaml new file mode 100644 index 0000000..de5cc3b --- /dev/null +++ b/ballerina/tests/resources/negative/negative_test_14.yaml @@ -0,0 +1 @@ +!!str - entry diff --git a/ballerina/tests/resources/nested_19.yaml b/ballerina/tests/resources/nested_19.yaml new file mode 100644 index 0000000..f3c481f --- /dev/null +++ b/ballerina/tests/resources/nested_19.yaml @@ -0,0 +1,5 @@ +- | # comment +- | # another comment +- | + block scalar + value \ No newline at end of file diff --git a/ballerina/tests/resources/simple_yaml_1a.yaml b/ballerina/tests/resources/simple_yaml_1a.yaml index d055bee..456395a 100644 --- a/ballerina/tests/resources/simple_yaml_1a.yaml +++ b/ballerina/tests/resources/simple_yaml_1a.yaml @@ -1,5 +1,5 @@ name: "Jhon" -description: | +description: | # comment This is a multiline string in YAML. It preserves line breaks. diff --git a/native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java b/native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java index 1b08970..8a3677d 100644 --- a/native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java +++ b/native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java @@ -26,6 +26,7 @@ import static io.ballerina.lib.data.yaml.lexer.Scanner.COMMENT_SCANNER; import static io.ballerina.lib.data.yaml.lexer.Scanner.VERBATIM_URI_SCANNER; +import static io.ballerina.lib.data.yaml.lexer.Scanner.WHITE_SPACE_SCANNER; import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.ALIAS; import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.ANCHOR; import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.CHOMPING_INDICATOR; @@ -333,7 +334,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException boolean startsWithWhiteSpace = false; if (WHITE_SPACE_PATTERN.pattern(lexerState.peek())) { - Scanner.iterate(lexerState, Scanner.WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); + Scanner.iterate(lexerState, WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); startsWithWhiteSpace = true; } @@ -630,7 +631,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException // Check for separation-in-space before the tag prefix if (WHITE_SPACE_PATTERN.pattern(lexerState.peek())) { - Scanner.iterate(lexerState, Scanner.WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); + Scanner.iterate(lexerState, WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); return this; } @@ -656,7 +657,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException // Check for tail separation-in-line if (WHITE_SPACE_PATTERN.pattern(lexerState.peek())) { - Scanner.iterate(lexerState, Scanner.WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); + Scanner.iterate(lexerState, WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); return this; } @@ -689,7 +690,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException // Check for tail separation-in-line if (WHITE_SPACE_PATTERN.pattern(lexerState.peek())) { - Scanner.iterate(lexerState, Scanner.WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); + Scanner.iterate(lexerState, WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); return this; } @@ -721,7 +722,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException // Check for tail separation-in-line if (WHITE_SPACE_PATTERN.pattern(lexerState.peek())) { - Scanner.iterate(lexerState, Scanner.WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); + Scanner.iterate(lexerState, WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); return this; } @@ -839,13 +840,15 @@ private static class BlockHeaderState implements State { */ @Override public State transition(LexerState lexerState) throws Error.YamlParserException { + boolean hasWhiteSpace = false; if (lexerState.peek() == ' ') { - Scanner.iterate(lexerState, Scanner.WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); + hasWhiteSpace = true; + Scanner.iterate(lexerState, WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); } // Ignore any comments - if (lexerState.peek() == '#' && WHITE_SPACE_PATTERN.pattern(lexerState.peek())) { - lexerState.tokenize(EOL); + if (lexerState.peek() == '#' && hasWhiteSpace) { + Scanner.iterate(lexerState, COMMENT_SCANNER, EOL); return this; } @@ -853,19 +856,13 @@ public State transition(LexerState lexerState) throws Error.YamlParserException if (Utils.matchPattern(lexerState, List.of(DECIMAL_PATTERN), List.of(new Utils.CharPattern('0')))) { lexerState.captureIndent = false; int numericValue = Character.getNumericValue(lexerState.peek()); - if (numericValue == -1 || numericValue == -2) { - throw new Error.YamlParserException("invalid numeric value", - lexerState.getLine(), lexerState.getColumn()); - } lexerState.addIndent += numericValue; lexerState.forward(); return this.transition(lexerState); } // If the indentation indicator is at the tail - if (lexerState.getColumn() >= lexerState.getRemainingBufferedSize()) { - lexerState.forward(); - lexerState.tokenize(EOL); + if (Scanner.scanAndTokenizeEOL(lexerState, EOL)) { return this; } @@ -894,7 +891,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException int limit = lexerState.indent + lexerState.addIndent; // Check if the line has sufficient indent to be process as a block scalar. - for (int i = 0; i < limit - 1; i++) { + for (int i = 0; i < limit; i++) { if (lexerState.peek() != ' ') { hasSufficientIndent = false; break; @@ -967,9 +964,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException } // Generate an empty lines that have less index. - if (lexerState.getColumn() >= lexerState.getRemainingBufferedSize()) { - lexerState.forward(); - lexerState.tokenize(EMPTY_LINE); + if (Scanner.scanAndTokenizeEOL(lexerState, EMPTY_LINE)) { return this; } @@ -988,9 +983,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException } } - if (lexerState.getColumn() >= lexerState.getRemainingBufferedSize()) { - lexerState.forward(); - lexerState.tokenize(EMPTY_LINE); + if (Scanner.scanAndTokenizeEOL(lexerState, EMPTY_LINE)) { return this; } @@ -1023,7 +1016,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException // Check for separation-in-line if (WHITE_SPACE_PATTERN.pattern(lexerState.peek())) { - Scanner.iterate(lexerState, Scanner.WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); + Scanner.iterate(lexerState, WHITE_SPACE_SCANNER, SEPARATION_IN_LINE); return this; } diff --git a/native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java b/native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java index b8d828d..d226209 100644 --- a/native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java +++ b/native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java @@ -549,4 +549,18 @@ private static void processEscapedWhiteSpaces(LexerState sm) { sm.forward(); } } + + public static boolean scanAndTokenizeEOL(LexerState state, Token.TokenType token) { + if (state.peek() == '\n' || state.peek() == -1) { + state.forward(); + state.tokenize(token); + return true; + } + if (state.peek() == '\r' && state.peek(1) == '\n') { + state.forward(2); + state.tokenize(token); + return true; + } + return false; + } }