Skip to content

Commit

Permalink
Poprawki do etapu 3 (Analizator leksykalny)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dawid Sygocki committed May 6, 2022
1 parent 6a0aa33 commit 4088367
Show file tree
Hide file tree
Showing 32 changed files with 371 additions and 293 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Podstawowe typy danych będą przekazywane do funkcji przez kopię, natomiast ł

## Formalna specyfikacja i składnia

Gramatyka realizowanego języka opisana jest w pliku [gramatyka.md](gramatyka.md). Reguły dotyczące operatorów są zgodne z tabelami z pliku [operatory.md](operatory.md).
Gramatyka realizowanego języka opisana jest w pliku [gramatyka.md](docs/gramatyka.md). Reguły dotyczące operatorów są zgodne z tabelami z pliku [operatory.md](docs/operatory.md).

Nie przewiduje się na razie konfiguracji zachowania interpretera poprzez specjalne pliki.

Expand Down Expand Up @@ -70,7 +70,7 @@ Możliwe jest importowanie zawartości innych skryptów za pomocą instrukcji `p
* słowo kluczowe `break` pozwala na bezwarunkowe przerwanie wykonania obu typów pętli
* słowo kluczowe `break_if` pozwala na warunkowe przerwanie wykonania obu typów pętli - warunek należy podać w nawiasach
8. funkcje
* defiowanie funkcji anonimowych z użyciem słowa kluczowego `functi`, po którym następuje lista parametrów i ciało funkcji
* defiowanie funkcji anonimowych z użyciem słowa kluczowego `functi`, po którym następuje lista parametrów i ciało funkcji (blok)
* funkcje anonimowe mogą być przypisane do zmiennej/stałej
* funkcje anonimowe mogą przechwytywać zmienne (mechanizm domknięć), ale nie mogą ich modyfikować
* wywołanie funkcji możliwe jest z użyciem nawiasów, w których podane są argumenty, możliwe rekursywne wywołania
Expand Down
167 changes: 108 additions & 59 deletions Toffee.Tests/LexicalAnalysis/LexerTests.cs

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion Toffee.Tests/LexicalAnalysis/ScannerMock.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ public ScannerMock(string contentToOutput)
_outputBuffer = contentToOutput;
}

public void Advance()
public char? Advance()
{
var supersededCharacter = CurrentCharacter;

if (CurrentCharacter is '\n')
CurrentPosition = CurrentPosition.WithIncrementedLine(1);
else if (CurrentCharacter is not null)
CurrentPosition = CurrentPosition.WithIncrementedColumn();
return supersededCharacter;
}
}
12 changes: 12 additions & 0 deletions Toffee.Tests/Scanning/ScannerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,16 @@ public void DifferentNewLineCharactersShouldIncrementPositionProperly()
Assert.Equal((uint)increments.Length + 1, scanner.CurrentPosition.Line);
Assert.Equal(0u, scanner.CurrentPosition.Column);
}

[Fact]
public void SupersededCharactersShouldBeReturnedByAdvanceMethodCorrectly()
{
const string input = "abcd1234";
var scanner = new Scanner(new StringReader(input));

foreach (var character in input)
Assert.Equal(character, scanner.Advance());

Assert.Null(scanner.Advance());
}
}
5 changes: 0 additions & 5 deletions Toffee/.dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,8 @@
**/*.jfm
**/azds.yaml
**/bin
**/charts
**/docker-compose*
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/obj
**/secrets.dev.yaml
**/values.dev.yaml
LICENSE
README.md
6 changes: 3 additions & 3 deletions Toffee/CommandLine/Application.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public class Application
{
private TextReader? _reader;
private IScanner? _scanner;
private Logger? _logger;
private ILexerErrorHandler? _logger;
private LexerBase? _lexer;

[DefaultCommand]
Expand All @@ -24,7 +24,7 @@ public void Execute(
: new StreamReader(scriptFilename.Name);
var sourceName = scriptFilename?.Name ?? "STDIN";
_scanner = new Scanner(_reader);
_logger = new ConsoleLogger(sourceName);
_logger = new ConsoleErrorHandler(sourceName);
_lexer = new Lexer(_scanner, _logger, maxLexemeLength);
RunLexer();
}
Expand All @@ -35,7 +35,7 @@ static string FormatPosition(Position position) =>
$"[{position.Character}] {position.Line}:{position.Column}";
while (_lexer!.CurrentToken.Type != TokenType.EndOfText)
{
var positionDescription = FormatPosition(_lexer.CurrentToken.Position);
var positionDescription = FormatPosition(_lexer.CurrentToken.StartPosition);
var contentDescription = _lexer.CurrentToken.Content switch
{
char charContent => $"0x{Convert.ToByte(charContent):x2}",
Expand Down
94 changes: 51 additions & 43 deletions Toffee/LexicalAnalysis/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,16 @@ namespace Toffee.LexicalAnalysis;
public sealed partial class Lexer : LexerBase
{
private readonly IScanner _scanner;
private readonly Logger? _logger;
private readonly ILexerErrorHandler? _errorHandler;
private Position _tokenStartPosition;

private uint CurrentOffset => _scanner.CurrentPosition.Character - _tokenStartPosition.Character;

private delegate Token? MatchDelegate();
private readonly List<MatchDelegate> _matchers;

public Lexer(IScanner scanner, Logger? logger = null, int? maxLexemeLength = null) : base(maxLexemeLength)
public Lexer(IScanner scanner, ILexerErrorHandler? errorHandler = null, int? maxLexemeLength = null) : base(maxLexemeLength)
{
_scanner = scanner;
_logger = logger;
_errorHandler = errorHandler;

_matchers = new List<MatchDelegate>
{
Expand All @@ -28,6 +26,7 @@ public Lexer(IScanner scanner, Logger? logger = null, int? maxLexemeLength = nul
MatchString
};

CurrentToken = new Token(TokenType.Unknown);
Advance();
}

Expand All @@ -46,56 +45,65 @@ private static int CharToDigit(char c) => c >= 'a'
? c - 'A' + 10
: c - '0';

private void AppendDigitConsideringOverflowGivenRadix(int radix, ref long buffer, char digit, ref bool overflowOccurred, uint? offset = null)
private void CollectDigitConsideringOverflowGivenRadix(int radix, ref ulong buffer, ref bool overflowOccurred)
{
if (!overflowOccurred)
var digitPosition = _scanner.CurrentPosition;
if (overflowOccurred)
{
_scanner.Advance();
return;
}
try
{
try
{
buffer = checked(radix * buffer + CharToDigit(digit));
}
catch (OverflowException)
{
overflowOccurred = true;
EmitError(new NumberLiteralTooLarge(offset ?? CurrentOffset));
}
buffer = checked((ulong)radix * buffer + (ulong)CharToDigit(_scanner.Advance()!.Value));
}
catch (OverflowException)
{
overflowOccurred = true;
EmitError(new NumberLiteralTooLarge(digitPosition));
}
}

private void CollectCharConsideringLengthLimit(StringBuilder buffer, ref bool maxLengthExceeded)
{
var charPosition = _scanner.CurrentPosition;
AppendCharConsideringLengthLimit(buffer, _scanner.Advance(), ref maxLengthExceeded, charPosition);
}

private void AppendCharConsideringLengthLimit(StringBuilder buffer, char? c, ref bool maxLengthExceeded, uint? offset = null)
private void AppendCharConsideringLengthLimit(StringBuilder buffer, char? c, ref bool maxLengthExceeded, Position charPosition)
{
if (!maxLengthExceeded && buffer.Length >= MaxLexemeLength)
{
maxLengthExceeded = true;
EmitError(new ExceededMaxLexemeLength(charPosition, MaxLexemeLength));
}
if (maxLengthExceeded)
return;
if (buffer.Length >= MaxLexemeLength)
try
{
maxLengthExceeded = true;
EmitError(new ExceededMaxLexemeLength(offset ?? CurrentOffset));
buffer.Append(c);
}
else
catch (ArgumentOutOfRangeException)
{
try
{
buffer.Append(c);
}
catch (ArgumentOutOfRangeException)
{
maxLengthExceeded = true;
EmitError(new ExceededMaxLexemeLength(offset ?? CurrentOffset));
}
maxLengthExceeded = true;
EmitError(new ExceededMaxLexemeLength(charPosition, MaxLexemeLength));
}
}

private void EmitError(LexerError error)
{
CurrentError = error;
_logger?.LogError(_tokenStartPosition, error.ToMessage(), error);
_errorHandler?.Handle(error);
}

private void EmitWarning(LexerWarning warning)
{
_logger?.LogWarning(_tokenStartPosition, warning.ToMessage(), warning);
_errorHandler?.Handle(warning);
}

private Token FillInTokenPosition(Token baseToken) =>
baseToken with { StartPosition = _tokenStartPosition, EndPosition = _scanner.CurrentPosition };

private bool TryMatchToken(out Token matchedToken)
{
matchedToken = new Token(TokenType.Unknown);
Expand All @@ -116,26 +124,26 @@ private void SkipWhitespaces()
_scanner.Advance();
}

public override void Advance()
public override Token Advance()
{
CurrentError = null;
var supersededToken = CurrentToken;
SkipWhitespaces();

_tokenStartPosition = _scanner.CurrentPosition;

if (_scanner.CurrentCharacter is null)
CurrentToken = new Token(TokenType.EndOfText, "ETX", _tokenStartPosition);
CurrentToken = FillInTokenPosition(new Token(TokenType.EndOfText, "ETX"));
else if (TryMatchToken(out var matchedToken))
CurrentToken = matchedToken with { Position = _tokenStartPosition };
CurrentToken = FillInTokenPosition(matchedToken);
else
{
var buffer = $"{_scanner.CurrentCharacter.Value}";
var unknownTokenPosition = _scanner.CurrentPosition;
var buffer = $"{_scanner.Advance()}";
if (char.IsHighSurrogate(buffer[0]))
{
_scanner.Advance();
buffer += _scanner.CurrentCharacter;
}
EmitError(new UnknownToken());
CurrentToken = new Token(TokenType.Unknown, buffer, _tokenStartPosition);
buffer += _scanner.Advance();
EmitError(new UnknownToken(unknownTokenPosition, buffer));
CurrentToken = FillInTokenPosition(new Token(TokenType.Unknown, buffer));
}
return supersededToken;
}
}
6 changes: 5 additions & 1 deletion Toffee/LexicalAnalysis/LexerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ protected LexerBase(int? maxLexemeLength = null)
MaxLexemeLength = maxLexemeLength ?? int.MaxValue;
}

public abstract void Advance();
/// <summary>
/// Advances the position of the lexer in the token stream.
/// </summary>
/// <returns>Superseded token - the current one from before the method was called</returns>
public abstract Token Advance();

public void ResetError()
{
Expand Down
17 changes: 10 additions & 7 deletions Toffee/LexicalAnalysis/LexerErrors.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
using System.Collections.ObjectModel;
using Toffee.Scanning;

namespace Toffee.LexicalAnalysis;

public abstract record LexerError(uint Offset);
public record UnexpectedEndOfText(uint Offset = 0) : LexerError(Offset);
public record ExceededMaxLexemeLength(uint Offset = 0) : LexerError(Offset);
public record UnknownToken(uint Offset = 0) : LexerError(Offset);
public record NumberLiteralTooLarge(uint Offset = 0) : LexerError(Offset);
public record MissingNonDecimalDigits(uint Offset = 0) : LexerError(Offset);
public record MissingExponent(uint Offset = 0) : LexerError(Offset);
public abstract record LexerError(Position Position);
public record UnexpectedEndOfText(Position Position, TokenType BuiltTokenType) : LexerError(Position);
public record ExceededMaxLexemeLength(Position Position, int MaxLexemeLength) : LexerError(Position);
public record UnknownToken(Position Position, string Content) : LexerError(Position);
public record NumberLiteralTooLarge(Position Position) : LexerError(Position);
public record InvalidNonDecimalPrefix(Position Position, char NonDecimalPrefix) : LexerError(Position);
public record MissingNonDecimalDigits(Position Position, char NonDecimalPrefix) : LexerError(Position);
public record MissingExponent(Position Position) : LexerError(Position);

public static class LexerErrorExtensions
{
Expand All @@ -18,6 +20,7 @@ public static class LexerErrorExtensions
{ typeof(ExceededMaxLexemeLength), "Unexpected end of text" },
{ typeof(UnknownToken), "Unknown token" },
{ typeof(NumberLiteralTooLarge), "Overflow in number literal" },
{ typeof(InvalidNonDecimalPrefix), "Unknown non-decimal number prefix" },
{ typeof(MissingNonDecimalDigits), "No digits after non-decimal number prefix" },
{ typeof(MissingExponent), "No digits after scientific notation prefix" }
});
Expand Down
7 changes: 4 additions & 3 deletions Toffee/LexicalAnalysis/LexerWarnings.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
using System.Collections.ObjectModel;
using Toffee.Scanning;

namespace Toffee.LexicalAnalysis;

public abstract record LexerWarning(uint Offset);
public record UnknownEscapeSequence(char Specifier, uint Offset = 0) : LexerWarning(Offset);
public record MissingHexCharCode(uint Offset = 0) : LexerWarning(Offset);
public abstract record LexerWarning(Position Position);
public record UnknownEscapeSequence(Position Position, char Specifier) : LexerWarning(Position);
public record MissingHexCharCode(Position Position) : LexerWarning(Position);

public static class LexerWarningExtensions
{
Expand Down
8 changes: 2 additions & 6 deletions Toffee/LexicalAnalysis/Lexer_KeywordsAndIdentifiers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,10 @@ public sealed partial class Lexer
if (_scanner.CurrentCharacter is null || !char.IsLetter(_scanner.CurrentCharacter.Value))
return null;

var nameBuilder = new StringBuilder($"{_scanner.CurrentCharacter.Value}");
var nameBuilder = new StringBuilder($"{_scanner.Advance()}");
var maxLengthExceeded = false;
_scanner.Advance();
while (IsPartOfIdentifier(_scanner.CurrentCharacter))
{
AppendCharConsideringLengthLimit(nameBuilder, _scanner.CurrentCharacter, ref maxLengthExceeded);
_scanner.Advance();
}
CollectCharConsideringLengthLimit(nameBuilder, ref maxLengthExceeded);
return KeywordOrIdentifierMapper.MapToKeywordOrIdentifier(nameBuilder.ToString());
}
}
Loading

0 comments on commit 4088367

Please sign in to comment.