Poprawki do etapu 3 (Analizator leksykalny)

Dove6 · May 6, 2022 · 4088367 · 4088367
1 parent 6a0aa33
commit 4088367
Show file tree

Hide file tree

Showing 32 changed files with 371 additions and 293 deletions.
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ Podstawowe typy danych będą przekazywane do funkcji przez kopię, natomiast ł
 
 ## Formalna specyfikacja i składnia
 
-Gramatyka realizowanego języka opisana jest w pliku [gramatyka.md](gramatyka.md). Reguły dotyczące operatorów są zgodne z tabelami z pliku [operatory.md](operatory.md).
+Gramatyka realizowanego języka opisana jest w pliku [gramatyka.md](docs/gramatyka.md). Reguły dotyczące operatorów są zgodne z tabelami z pliku [operatory.md](docs/operatory.md).
 
 Nie przewiduje się na razie konfiguracji zachowania interpretera poprzez specjalne pliki.
 
@@ -70,7 +70,7 @@ Możliwe jest importowanie zawartości innych skryptów za pomocą instrukcji `p
         * słowo kluczowe `break` pozwala na bezwarunkowe przerwanie wykonania obu typów pętli
         * słowo kluczowe `break_if` pozwala na warunkowe przerwanie wykonania obu typów pętli - warunek należy podać w nawiasach
 8. funkcje
-    * defiowanie funkcji anonimowych z użyciem słowa kluczowego `functi`, po którym następuje lista parametrów i ciało funkcji
+    * defiowanie funkcji anonimowych z użyciem słowa kluczowego `functi`, po którym następuje lista parametrów i ciało funkcji (blok)
     * funkcje anonimowe mogą być przypisane do zmiennej/stałej
     * funkcje anonimowe mogą przechwytywać zmienne (mechanizm domknięć), ale nie mogą ich modyfikować
     * wywołanie funkcji możliwe jest z użyciem nawiasów, w których podane są argumenty, możliwe rekursywne wywołania

diff --git a/Toffee.Tests/LexicalAnalysis/LexerTests.cs b/Toffee.Tests/LexicalAnalysis/LexerTests.cs
diff --git a/Toffee.Tests/LexicalAnalysis/ScannerMock.cs b/Toffee.Tests/LexicalAnalysis/ScannerMock.cs
@@ -16,11 +16,14 @@ public ScannerMock(string contentToOutput)
         _outputBuffer = contentToOutput;
     }
 
-    public void Advance()
+    public char? Advance()
     {
+        var supersededCharacter = CurrentCharacter;
+
         if (CurrentCharacter is '\n')
             CurrentPosition = CurrentPosition.WithIncrementedLine(1);
         else if (CurrentCharacter is not null)
             CurrentPosition = CurrentPosition.WithIncrementedColumn();
+        return supersededCharacter;
     }
 }
diff --git a/Toffee.Tests/Scanning/ScannerTests.cs b/Toffee.Tests/Scanning/ScannerTests.cs
@@ -120,4 +120,16 @@ public void DifferentNewLineCharactersShouldIncrementPositionProperly()
         Assert.Equal((uint)increments.Length + 1, scanner.CurrentPosition.Line);
         Assert.Equal(0u, scanner.CurrentPosition.Column);
     }
+
+    [Fact]
+    public void SupersededCharactersShouldBeReturnedByAdvanceMethodCorrectly()
+    {
+        const string input = "abcd1234";
+        var scanner = new Scanner(new StringReader(input));
+
+        foreach (var character in input)
+            Assert.Equal(character, scanner.Advance());
+
+        Assert.Null(scanner.Advance());
+    }
 }
diff --git a/Toffee/.dockerignore b/Toffee/.dockerignore
@@ -13,13 +13,8 @@
 **/*.jfm
 **/azds.yaml
 **/bin
-**/charts
 **/docker-compose*
 **/Dockerfile*
-**/node_modules
-**/npm-debug.log
 **/obj
 **/secrets.dev.yaml
 **/values.dev.yaml
-LICENSE
-README.md
diff --git a/Toffee/CommandLine/Application.cs b/Toffee/CommandLine/Application.cs
@@ -9,7 +9,7 @@ public class Application
 {
     private TextReader? _reader;
     private IScanner? _scanner;
-    private Logger? _logger;
+    private ILexerErrorHandler? _logger;
     private LexerBase? _lexer;
 
     [DefaultCommand]
@@ -24,7 +24,7 @@ public void Execute(
             : new StreamReader(scriptFilename.Name);
         var sourceName = scriptFilename?.Name ?? "STDIN";
         _scanner = new Scanner(_reader);
-        _logger = new ConsoleLogger(sourceName);
+        _logger = new ConsoleErrorHandler(sourceName);
         _lexer = new Lexer(_scanner, _logger, maxLexemeLength);
         RunLexer();
     }
@@ -35,7 +35,7 @@ static string FormatPosition(Position position) =>
             $"[{position.Character}] {position.Line}:{position.Column}";
         while (_lexer!.CurrentToken.Type != TokenType.EndOfText)
         {
-            var positionDescription = FormatPosition(_lexer.CurrentToken.Position);
+            var positionDescription = FormatPosition(_lexer.CurrentToken.StartPosition);
             var contentDescription = _lexer.CurrentToken.Content switch
             {
                 char charContent => $"0x{Convert.ToByte(charContent):x2}",

diff --git a/Toffee/LexicalAnalysis/Lexer.cs b/Toffee/LexicalAnalysis/Lexer.cs
@@ -7,18 +7,16 @@ namespace Toffee.LexicalAnalysis;
 public sealed partial class Lexer : LexerBase
 {
     private readonly IScanner _scanner;
-    private readonly Logger? _logger;
+    private readonly ILexerErrorHandler? _errorHandler;
     private Position _tokenStartPosition;
 
-    private uint CurrentOffset => _scanner.CurrentPosition.Character - _tokenStartPosition.Character;
-
     private delegate Token? MatchDelegate();
     private readonly List<MatchDelegate> _matchers;
 
-    public Lexer(IScanner scanner, Logger? logger = null, int? maxLexemeLength = null) : base(maxLexemeLength)
+    public Lexer(IScanner scanner, ILexerErrorHandler? errorHandler = null, int? maxLexemeLength = null) : base(maxLexemeLength)
     {
         _scanner = scanner;
-        _logger = logger;
+        _errorHandler = errorHandler;
 
         _matchers = new List<MatchDelegate>
         {
@@ -28,6 +26,7 @@ public Lexer(IScanner scanner, Logger? logger = null, int? maxLexemeLength = nul
             MatchString
         };
 
+        CurrentToken = new Token(TokenType.Unknown);
         Advance();
     }
 
@@ -46,56 +45,65 @@ private static int CharToDigit(char c) => c >= 'a'
             ? c - 'A' + 10
             : c - '0';
 
-    private void AppendDigitConsideringOverflowGivenRadix(int radix, ref long buffer, char digit, ref bool overflowOccurred, uint? offset = null)
+    private void CollectDigitConsideringOverflowGivenRadix(int radix, ref ulong buffer, ref bool overflowOccurred)
     {
-        if (!overflowOccurred)
+        var digitPosition = _scanner.CurrentPosition;
+        if (overflowOccurred)
+        {
+            _scanner.Advance();
+            return;
+        }
+        try
         {
-            try
-            {
-                buffer = checked(radix * buffer + CharToDigit(digit));
-            }
-            catch (OverflowException)
-            {
-                overflowOccurred = true;
-                EmitError(new NumberLiteralTooLarge(offset ?? CurrentOffset));
-            }
+            buffer = checked((ulong)radix * buffer + (ulong)CharToDigit(_scanner.Advance()!.Value));
         }
+        catch (OverflowException)
+        {
+            overflowOccurred = true;
+            EmitError(new NumberLiteralTooLarge(digitPosition));
+        }
+    }
+
+    private void CollectCharConsideringLengthLimit(StringBuilder buffer, ref bool maxLengthExceeded)
+    {
+        var charPosition = _scanner.CurrentPosition;
+        AppendCharConsideringLengthLimit(buffer, _scanner.Advance(), ref maxLengthExceeded, charPosition);
     }
 
-    private void AppendCharConsideringLengthLimit(StringBuilder buffer, char? c, ref bool maxLengthExceeded, uint? offset = null)
+    private void AppendCharConsideringLengthLimit(StringBuilder buffer, char? c, ref bool maxLengthExceeded, Position charPosition)
     {
+        if (!maxLengthExceeded && buffer.Length >= MaxLexemeLength)
+        {
+            maxLengthExceeded = true;
+            EmitError(new ExceededMaxLexemeLength(charPosition, MaxLexemeLength));
+        }
         if (maxLengthExceeded)
             return;
-        if (buffer.Length >= MaxLexemeLength)
+        try
         {
-            maxLengthExceeded = true;
-            EmitError(new ExceededMaxLexemeLength(offset ?? CurrentOffset));
+            buffer.Append(c);
         }
-        else
+        catch (ArgumentOutOfRangeException)
         {
-            try
-            {
-                buffer.Append(c);
-            }
-            catch (ArgumentOutOfRangeException)
-            {
-                maxLengthExceeded = true;
-                EmitError(new ExceededMaxLexemeLength(offset ?? CurrentOffset));
-            }
+            maxLengthExceeded = true;
+            EmitError(new ExceededMaxLexemeLength(charPosition, MaxLexemeLength));
         }
     }
 
     private void EmitError(LexerError error)
     {
         CurrentError = error;
-        _logger?.LogError(_tokenStartPosition, error.ToMessage(), error);
+        _errorHandler?.Handle(error);
     }
 
     private void EmitWarning(LexerWarning warning)
     {
-        _logger?.LogWarning(_tokenStartPosition, warning.ToMessage(), warning);
+        _errorHandler?.Handle(warning);
     }
 
+    private Token FillInTokenPosition(Token baseToken) =>
+        baseToken with { StartPosition = _tokenStartPosition, EndPosition = _scanner.CurrentPosition };
+
     private bool TryMatchToken(out Token matchedToken)
     {
         matchedToken = new Token(TokenType.Unknown);
@@ -116,26 +124,26 @@ private void SkipWhitespaces()
             _scanner.Advance();
     }
 
-    public override void Advance()
+    public override Token Advance()
     {
+        CurrentError = null;
+        var supersededToken = CurrentToken;
         SkipWhitespaces();
-
         _tokenStartPosition = _scanner.CurrentPosition;
 
         if (_scanner.CurrentCharacter is null)
-            CurrentToken = new Token(TokenType.EndOfText, "ETX", _tokenStartPosition);
+            CurrentToken = FillInTokenPosition(new Token(TokenType.EndOfText, "ETX"));
         else if (TryMatchToken(out var matchedToken))
-            CurrentToken = matchedToken with { Position = _tokenStartPosition };
+            CurrentToken = FillInTokenPosition(matchedToken);
         else
         {
-            var buffer = $"{_scanner.CurrentCharacter.Value}";
+            var unknownTokenPosition = _scanner.CurrentPosition;
+            var buffer = $"{_scanner.Advance()}";
             if (char.IsHighSurrogate(buffer[0]))
-            {
-                _scanner.Advance();
-                buffer += _scanner.CurrentCharacter;
-            }
-            EmitError(new UnknownToken());
-            CurrentToken = new Token(TokenType.Unknown, buffer, _tokenStartPosition);
+                buffer += _scanner.Advance();
+            EmitError(new UnknownToken(unknownTokenPosition, buffer));
+            CurrentToken = FillInTokenPosition(new Token(TokenType.Unknown, buffer));
         }
+        return supersededToken;
     }
 }
diff --git a/Toffee/LexicalAnalysis/LexerBase.cs b/Toffee/LexicalAnalysis/LexerBase.cs
@@ -23,7 +23,11 @@ protected LexerBase(int? maxLexemeLength = null)
         MaxLexemeLength = maxLexemeLength ?? int.MaxValue;
     }
 
-    public abstract void Advance();
+    /// <summary>
+    /// Advances the position of the lexer in the token stream.
+    /// </summary>
+    /// <returns>Superseded token - the current one from before the method was called</returns>
+    public abstract Token Advance();
 
     public void ResetError()
     {

diff --git a/Toffee/LexicalAnalysis/LexerErrors.cs b/Toffee/LexicalAnalysis/LexerErrors.cs
@@ -1,14 +1,16 @@
 using System.Collections.ObjectModel;
+using Toffee.Scanning;
 
 namespace Toffee.LexicalAnalysis;
 
-public abstract record LexerError(uint Offset);
-public record UnexpectedEndOfText(uint Offset = 0) : LexerError(Offset);
-public record ExceededMaxLexemeLength(uint Offset = 0) : LexerError(Offset);
-public record UnknownToken(uint Offset = 0) : LexerError(Offset);
-public record NumberLiteralTooLarge(uint Offset = 0) : LexerError(Offset);
-public record MissingNonDecimalDigits(uint Offset = 0) : LexerError(Offset);
-public record MissingExponent(uint Offset = 0) : LexerError(Offset);
+public abstract record LexerError(Position Position);
+public record UnexpectedEndOfText(Position Position, TokenType BuiltTokenType) : LexerError(Position);
+public record ExceededMaxLexemeLength(Position Position, int MaxLexemeLength) : LexerError(Position);
+public record UnknownToken(Position Position, string Content) : LexerError(Position);
+public record NumberLiteralTooLarge(Position Position) : LexerError(Position);
+public record InvalidNonDecimalPrefix(Position Position, char NonDecimalPrefix) : LexerError(Position);
+public record MissingNonDecimalDigits(Position Position, char NonDecimalPrefix) : LexerError(Position);
+public record MissingExponent(Position Position) : LexerError(Position);
 
 public static class LexerErrorExtensions
 {
@@ -18,6 +20,7 @@ public static class LexerErrorExtensions
         { typeof(ExceededMaxLexemeLength), "Unexpected end of text" },
         { typeof(UnknownToken), "Unknown token" },
         { typeof(NumberLiteralTooLarge), "Overflow in number literal" },
+        { typeof(InvalidNonDecimalPrefix), "Unknown non-decimal number prefix" },
         { typeof(MissingNonDecimalDigits), "No digits after non-decimal number prefix" },
         { typeof(MissingExponent), "No digits after scientific notation prefix" }
     });

diff --git a/Toffee/LexicalAnalysis/LexerWarnings.cs b/Toffee/LexicalAnalysis/LexerWarnings.cs
@@ -1,10 +1,11 @@
 using System.Collections.ObjectModel;
+using Toffee.Scanning;
 
 namespace Toffee.LexicalAnalysis;
 
-public abstract record LexerWarning(uint Offset);
-public record UnknownEscapeSequence(char Specifier, uint Offset = 0) : LexerWarning(Offset);
-public record MissingHexCharCode(uint Offset = 0) : LexerWarning(Offset);
+public abstract record LexerWarning(Position Position);
+public record UnknownEscapeSequence(Position Position, char Specifier) : LexerWarning(Position);
+public record MissingHexCharCode(Position Position) : LexerWarning(Position);
 
 public static class LexerWarningExtensions
 {

diff --git a/Toffee/LexicalAnalysis/Lexer_KeywordsAndIdentifiers.cs b/Toffee/LexicalAnalysis/Lexer_KeywordsAndIdentifiers.cs
@@ -10,14 +10,10 @@ public sealed partial class Lexer
         if (_scanner.CurrentCharacter is null || !char.IsLetter(_scanner.CurrentCharacter.Value))
             return null;
 
-        var nameBuilder = new StringBuilder($"{_scanner.CurrentCharacter.Value}");
+        var nameBuilder = new StringBuilder($"{_scanner.Advance()}");
         var maxLengthExceeded = false;
-        _scanner.Advance();
         while (IsPartOfIdentifier(_scanner.CurrentCharacter))
-        {
-            AppendCharConsideringLengthLimit(nameBuilder, _scanner.CurrentCharacter, ref maxLengthExceeded);
-            _scanner.Advance();
-        }
+            CollectCharConsideringLengthLimit(nameBuilder, ref maxLengthExceeded);
         return KeywordOrIdentifierMapper.MapToKeywordOrIdentifier(nameBuilder.ToString());
     }
 }