Skip to content

Commit

Permalink
Add tests for handling empty lines
Browse files Browse the repository at this point in the history
  • Loading branch information
LakshanWeerasinghe committed Jun 12, 2024
1 parent 2b96a07 commit 4248895
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 29 deletions.
12 changes: 12 additions & 0 deletions ballerina/tests/parse_string.bal
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,18 @@ isolated function testBlockScalarValue() returns error? {
test:assertEquals(value, expectedResult);
}

@test:Config
isolated function testParsingEmptyLines() returns error? {
string content = check io:fileReadString(FILE_PATH + "nested_20.yaml");
map<string> value = check parseString(content);
map<string> expectedResult = {
"FoldingDoubleQuote": "Empty line\nas a line feed",
"FoldingSingleQuote": "Empty line\nas a line feed",
"Chomping": "Clipped empty lines\n"
};
test:assertEquals(value, expectedResult);
}

@test:Config
isolated function testByteAsExpectedTypeForParseString() returns error? {
byte result = check parseString("1");
Expand Down
10 changes: 10 additions & 0 deletions ballerina/tests/resources/nested_20.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FoldingDoubleQuote:
"Empty line
as a line feed"
FoldingSingleQuote:
'Empty line
as a line feed'
Chomping: |
Clipped empty lines
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public class LexerState {
public static final State LEXER_DOUBLE_QUOTE = new DoubleQuoteState();
public static final State LEXER_SINGLE_QUOTE = new SingleQuoteState();
public static final State LEXER_BLOCK_HEADER = new BlockHeaderState();
public static final State LEXER_LITERAL = new LiteralState();
public static final State LEXER_BLOCK_SCALAR = new BlockScalarState();
public static final State LEXER_RESERVED_DIRECTIVE = new ReservedDirectiveState();
private State state = LEXER_START_STATE;
private final CharacterReader characterReader;
Expand Down Expand Up @@ -764,6 +764,10 @@ public State transition(LexerState lexerState) throws Error.YamlParserException
}
}

if (Scanner.scanAndTokenizeEOL(lexerState, EMPTY_LINE)) {
return this;
}

// Terminating delimiter
if (lexerState.peek() == '\"') {
IndentUtils.handleMappingValueIndent(lexerState, DOUBLE_QUOTE_DELIMITER);
Expand Down Expand Up @@ -809,6 +813,10 @@ public State transition(LexerState lexerState) throws Error.YamlParserException
}
}

if (Scanner.scanAndTokenizeEOL(lexerState, EMPTY_LINE)) {
return this;
}

// Escaped single quote
if (lexerState.peek() == '\'' && lexerState.peek(1) == '\'') {
lexerState.lexeme += "'";
Expand Down Expand Up @@ -878,7 +886,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException
}
}

private static class LiteralState implements State {
private static class BlockScalarState implements State {

/**
* Scan the lexemes for block scalar.
Expand Down Expand Up @@ -906,6 +914,10 @@ public State transition(LexerState lexerState) throws Error.YamlParserException
return LexerState.LEXER_START_STATE.transition(lexerState);
}

if (Scanner.scanAndTokenizeEOL(lexerState, EMPTY_LINE)) {
return this;

Check warning on line 918 in native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java

View check run for this annotation

Codecov / codecov/patch

native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java#L918

Added line #L918 was not covered by tests
}

switch (lexerState.peek()) {
case '#' -> { // Generate beginning of the trailing comment
if (!lexerState.trailingComment && lexerState.captureIndent) {
Expand All @@ -928,11 +940,6 @@ public State transition(LexerState lexerState) throws Error.YamlParserException
case ':', '-' -> {
return LexerState.LEXER_START_STATE.transition(lexerState);
}
case -1 -> { // Empty lines are allowed in trailing comments
lexerState.forward();
lexerState.tokenize(EMPTY_LINE);
return this;
}
default -> { // Other characters are not allowed when the indentation is less
throw new Error.YamlParserException("insufficient indent to process literal characters",
lexerState.getLine(), lexerState.getColumn());

Check warning on line 945 in native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java

View check run for this annotation

Codecov / codecov/patch

native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java#L944-L945

Added lines #L944 - L945 were not covered by tests
Expand All @@ -942,6 +949,10 @@ public State transition(LexerState lexerState) throws Error.YamlParserException

if (lexerState.trailingComment) {
while (true) {
if (Scanner.scanAndTokenizeEOL(lexerState, EMPTY_LINE)) {
return this;

Check warning on line 953 in native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java

View check run for this annotation

Codecov / codecov/patch

native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java#L953

Added line #L953 was not covered by tests
}

switch (lexerState.peek()) {
case ' ' -> { // Ignore whitespace
lexerState.forward();
Expand All @@ -950,11 +961,6 @@ public State transition(LexerState lexerState) throws Error.YamlParserException
lexerState.tokenize(EOL);
return this;

Check warning on line 962 in native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java

View check run for this annotation

Codecov / codecov/patch

native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java#L961-L962

Added lines #L961 - L962 were not covered by tests
}
case -1 -> { // Empty lines are allowed in trailing comments
lexerState.forward();
lexerState.tokenize(EMPTY_LINE);
return this;
}
default -> {
throw new Error.YamlParserException("invalid trailing comment",
lexerState.getLine(), lexerState.getColumn());

Check warning on line 966 in native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java

View check run for this annotation

Codecov / codecov/patch

native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java#L965-L966

Added lines #L965 - L966 were not covered by tests
Expand Down
19 changes: 16 additions & 3 deletions native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public static class SingleQuoteCharScanner implements Scan {
* Process double quoted scalar values.
*/
@Override
public boolean scan(LexerState sm) {
public boolean scan(LexerState sm) throws Error.YamlParserException {
// Process nb-json characters
if (matchPattern(sm, List.of(JSON_PATTERN), List.of(new Utils.CharPattern('\'')))) {
sm.appendToLexeme(Character.toString(sm.peek()));
Expand All @@ -155,9 +155,14 @@ public boolean scan(LexerState sm) {
sm.forward();
return false;
}
return true;
}

return true;
if (Utils.isNewLine(sm)) {
return true;
}

throw new Error.YamlParserException("invalid character", sm.getLine(), sm.getColumn());

Check warning on line 165 in native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java

View check run for this annotation

Codecov / codecov/patch

native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java#L165

Added line #L165 was not covered by tests
}
}

Expand Down Expand Up @@ -185,7 +190,15 @@ public boolean scan(LexerState sm) throws Error.YamlParserException {
return false;
}

return true;
if (sm.peek() == '\"') {
return true;
}

if (Utils.isNewLine(sm)) {
return true;
}

throw new Error.YamlParserException("invalid character", sm.getLine(), sm.getColumn());

Check warning on line 201 in native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java

View check run for this annotation

Codecov / codecov/patch

native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java#L201

Added line #L201 was not covered by tests
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,4 +253,8 @@ public static String getWhitespace(LexerState sm) {
}
return whitespace.toString();
}

public static boolean isNewLine(LexerState sm) {
return sm.peek() == '\n' || sm.peek() == '\r' && sm.peek(1) == '\n';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,12 @@ public static LexerState.State scanTokens(LexerState state) throws Error.YamlPar
return state.getState();
}

// Check for line breaks when reading from string
if (state.peek() == '\n' && state.getState() != LEXER_DOUBLE_QUOTE) {
state.forward();
state.tokenize(EOL);
return state.getState();
}

// Check for line breaks when reading from string
if (state.peek() == '\r' && state.peek(1) == '\n' && state.getState() != LEXER_DOUBLE_QUOTE) {
state.forward();
state.tokenize(EOL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,11 @@
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.EMPTY_LINE;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.EOL;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.FOLDED;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.MAPPING_END;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.MAPPING_KEY;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.MAPPING_VALUE;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.PLANAR_CHAR;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.SEPARATION_IN_LINE;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.SEPARATOR;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.SEQUENCE_END;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.SEQUENCE_ENTRY;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.SINGLE_QUOTE_DELIMITER;
import static io.ballerina.lib.data.yaml.lexer.Token.TokenType.TAG;
Expand Down Expand Up @@ -1345,7 +1343,7 @@ private static YamlEvent parse(ParserState state, ParserUtils.ParserOption optio
return new YamlEvent.EndEvent(Collection.MAPPING);
}
case LITERAL, FOLDED -> {
state.updateLexerState(LexerState.LEXER_LITERAL);
state.updateLexerState(LexerState.LEXER_BLOCK_SCALAR);
return appendData(state, option, true);
}
}
Expand Down Expand Up @@ -1909,7 +1907,7 @@ private static String blockScalar(ParserState state, boolean isFolded) throws Er
}
}

state.getLexerState().updateLexerState(LexerState.LEXER_LITERAL);
state.getLexerState().updateLexerState(LexerState.LEXER_BLOCK_SCALAR);
StringBuilder lexemeBuffer = new StringBuilder();
StringBuilder newLineBuffer = new StringBuilder();
boolean isFirstLine = true;
Expand Down Expand Up @@ -2185,10 +2183,8 @@ private static String doubleQuoteScalar(ParserState state) throws Error.YamlPars
}
state.getLexerState().setFirstLine(false);
}
default -> {
throw new Error.YamlParserException("invalid double quote scalar",
state.getLine(), state.getColumn());
}
default -> throw new Error.YamlParserException("invalid double quote scalar",
state.getLine(), state.getColumn());
}
getNextToken(state);
}
Expand Down Expand Up @@ -2283,10 +2279,8 @@ private static String singleQuoteScalar(ParserState state) throws Error.YamlPars
}
state.getLexerState().setFirstLine(false);
}
default -> {
throw new Error.YamlParserException("invalid single quote character",
state.getLine(), state.getColumn());
}
default -> throw new Error.YamlParserException("invalid single quote character",
state.getLine(), state.getColumn());
}
getNextToken(state);
}
Expand Down

0 comments on commit 4248895

Please sign in to comment.