From 6861ab9f7ba9a2b33a04093da37d9992cbfbf83f Mon Sep 17 00:00:00 2001 From: Leandro Fernandes Vieira Date: Mon, 30 Oct 2023 00:29:23 -0300 Subject: [PATCH] tests passing --- RecordParser.Test/TextFindHelperFieldTest.cs | 40 ++++ .../Engines/Reader/TextFindFieldHelper.cs | 176 ++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 RecordParser.Test/TextFindHelperFieldTest.cs create mode 100644 RecordParser/Engines/Reader/TextFindFieldHelper.cs diff --git a/RecordParser.Test/TextFindHelperFieldTest.cs b/RecordParser.Test/TextFindHelperFieldTest.cs new file mode 100644 index 0000000..d4ed329 --- /dev/null +++ b/RecordParser.Test/TextFindHelperFieldTest.cs @@ -0,0 +1,40 @@ +using FluentAssertions; +using RecordParser.Engines; +using RecordParser.Engines.Reader; +using Xunit; + +namespace RecordParser.Test +{ + public class TextFindHelperFieldTest : TestSetup + { + [Fact] + public void TextFindHelper_GetField_Unordered() + { + // Arrage + + var record = """" + foo bar baz , 2020.05.23 , " billy is ""the guy""", 0123.45, LightBlue + """"; + + var finder = new TextFindHelperField(record, ",", QuoteHelper.Quote, + stackalloc (int start, int count, bool quoted)[5], + stackalloc char[1024]); + + // Act + + var e = finder.GetField(4); + var b = finder.GetField(1); + var d = finder.GetField(3); + var a = finder.GetField(0); + var c = finder.GetField(2); + + // Assert + + a.ToString().Should().Be("foo bar baz "); + b.ToString().Should().Be(" 2020.05.23 "); + c.ToString().Should().Be(" billy is \"the guy\""); + d.ToString().Should().Be(" 0123.45"); + e.ToString().Should().Be(" LightBlue"); + } + } +} diff --git a/RecordParser/Engines/Reader/TextFindFieldHelper.cs b/RecordParser/Engines/Reader/TextFindFieldHelper.cs new file mode 100644 index 0000000..be114f9 --- /dev/null +++ b/RecordParser/Engines/Reader/TextFindFieldHelper.cs @@ -0,0 +1,176 @@ +using System; +using System.Buffers; + +namespace RecordParser.Engines.Reader +{ + public ref struct TextFindHelperField + { + private readonly ReadOnlySpan line; + private readonly string delimiter; + private readonly (char ch, string str) quote; + + private int scanned; + private int position; + private int currentIndex; + private ReadOnlySpan currentValue; + + private char[] buffer; + + private Span<(int start, int count, bool quoted)> fields; + private Span cheapBuffer; + + internal TextFindHelperField(ReadOnlySpan source, string delimiter, (char ch, string str) quote, Span<(int start, int count, bool quoted)> fields, Span cheapBuffer) + { + this.line = source; + this.delimiter = delimiter; + this.quote = quote; + + scanned = -delimiter.Length; + position = 0; + currentIndex = -1; + currentValue = default; + this.fields = fields; + this.cheapBuffer = cheapBuffer; + buffer = null; + } + + internal void Dispose() + { + if (buffer != null) + { + ArrayPool.Shared.Return(buffer); + buffer = null; + } + } + + public ReadOnlySpan GetField(int index) + { + while (currentIndex < index) + { + GetValue(currentIndex + 1); + } + + var x = fields[index]; + var text = line.Slice(x.start, x.count); + + if (x.quoted == false) + return text; + + + // line.length - soma de count de todos campos onde x.quoted == false + // com isso consigo remover tamanho que será alocado + Span temp = buffer ?? (text.Length > cheapBuffer.Length + ? ArrayPool.Shared.Rent(text.Length) + : cheapBuffer); + + var j = 0; + for (var i = 0; i < text.Length; i++) + { + temp[j++] = text[i]; + + if (text[i + 1] == quote.ch) + { + i++; + } + } + + return temp.Slice(0, j); + } + + private ReadOnlySpan GetValue(int index) + { + if (index <= currentIndex) + { + if (index == currentIndex) + return currentValue; + else + throw new Exception("can only be forward"); + } + + while (currentIndex <= index) + { + var match = index == ++currentIndex; + currentValue = ParseChunk(match); + + if (match) + { + return currentValue; + } + } + + throw new Exception("invalid index for line"); + } + + private ReadOnlySpan ParseChunk(bool match) + { + scanned += position + delimiter.Length; + + var unlook = line.Slice(scanned); + var isQuotedField = unlook.TrimStart().StartsWith(quote.str); + + if (isQuotedField) + { + var value = ParseQuotedChuck(match); + fields[currentIndex] = (scanned, position, true); + return value; + } + + position = unlook.IndexOf(delimiter); + if (position < 0) + { + position = line.Length - scanned; + } + + fields[currentIndex] = (scanned, position, false); + return line.Slice(scanned, position); + } + + private ReadOnlySpan ParseQuotedChuck(bool match) + { + const string corruptFieldError = "Double quote is not escaped or there is extra data after a quoted field."; + + var unlook = line.Slice(scanned); + scanned += unlook.IndexOf(quote.ch) + 1; + unlook = line.Slice(scanned); + position = 0; + + for (int i = 0; i < unlook.Length; i++) + { + if (unlook[i] == quote.ch) + { + var next = unlook.Slice(i + 1); + if (next.TrimStart().IsEmpty) + { + position += i; + return default; + } + if (next[0] == quote.ch) + { + i++; + continue; + } + + for (var t = 0; t < next.Length; t++) + if (next.Slice(t).StartsWith(delimiter)) + { + position += i + 1 + t; + return default; + } + else if (char.IsWhiteSpace(next[t]) is false) + { + break; + } + + throw new Exception(corruptFieldError); + + } + else + { + continue; + } + } + + throw new Exception("Quoted field is missing closing quote."); + } + } +}