From 043e2460c5c4e51ce78277d81ac848bcf1cfadb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=81=97=E3=81=9A=E3=81=88?=
 <66309775+kuredoro@users.noreply.github.com>
Date: Fri, 8 Jan 2021 11:44:44 +0300
Subject: [PATCH] Spurious LFs are skipped when comparing lexemes.

---
 lexer.go      | 74 ++++++++++++++++++++++++++++-----------------------
 lexer_test.go | 37 ++++++++++++++++++++++++++
 testing.go    | 21 +++++----------
 3 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/lexer.go b/lexer.go
index 5108ffa..c95aa98 100644
--- a/lexer.go
+++ b/lexer.go
@@ -126,44 +126,38 @@ func (l *Lexer) Compare(target, source []string) (rts []RichText, ok bool) {
 	rts = make([]RichText, len(target))
 	ok = true
 
-	commonLen := len(target)
-	if len(source) < commonLen {
-		commonLen = len(source)
-	}
-
-	for i, xm := range target[:commonLen] {
-		rts[i].Str = xm
-
-		targetType := DeduceLexemeType(xm)
-		sourceType := DeduceLexemeType(source[i])
-
-		commonType := targetType
-		if sourceType < commonType {
-			commonType = sourceType
-		}
-
-		rts[i].Mask = MaskGenerators[commonType](l, xm, source[i])
-
-		maskEmpty := true
-		for _, bit := range rts[i].Mask {
-			if bit == true {
-				maskEmpty = false
-				break
-			}
-		}
-
-		if !maskEmpty {
+    ti, si := 0, 0
+	for ; ti < len(target) && si < len(source); ti, si = ti+1, si+1 {
+        // Skip spurious LFs
+        if source[si] != "\n" {
+            for ti < len(target) && target[ti] == "\n" {
+                rts[ti].Str = "\n"
+                rts[ti].Mask = []bool{true}
+                ok = false
+                ti++
+            }
+        } else if target[ti] != "\n" {
+            for si < len(source) && source[si] == "\n" {
+                si++
+            }
+        }
+
+        if ti == len(target) || si == len(source) {
+            break
+        }
+
+        xm := target[ti]
+		rts[ti].Str = xm
+		rts[ti].Mask = l.GenerateMask(xm, source[si])
+
+		if rts[ti].Colorful() {
 			ok = false
 		}
 	}
 
-	for i := commonLen; i < len(target); i++ {
-		rts[i].Str = target[i]
-
-		rts[i].Mask = make([]bool, len(target[i]))
-		for mi := range rts[i].Mask {
-			rts[i].Mask[mi] = true
-		}
+	for ; ti < len(target); ti++ {
+		rts[ti].Str = target[ti]
+        rts[ti].Mask = l.GenMaskForString(target[ti], "")
 
 		ok = false
 	}
@@ -182,6 +176,18 @@ func DeduceLexemeType(xm string) LexemeType {
 	return LexemeType(FINALXM - 1)
 }
 
+func (l *Lexer) GenerateMask(target, source string) []bool {
+    targetType := DeduceLexemeType(target)
+    sourceType := DeduceLexemeType(source)
+
+    commonType := targetType
+    if sourceType < commonType {
+        commonType = sourceType
+    }
+
+    return MaskGenerators[commonType](l, target, source)
+}
+
 func (l *Lexer) GenMaskForString(target, source string) (mask []bool) {
 	commonLen := len(target)
 	if len(source) < commonLen {
diff --git a/lexer_test.go b/lexer_test.go
index 6d102f4..3e0a3af 100644
--- a/lexer_test.go
+++ b/lexer_test.go
@@ -120,6 +120,43 @@ func TestLexerCompare(t *testing.T) {
 		cptest.AssertDiffFailure(t, ok)
 		cptest.AssertEnrichedLexSequence(t, got, want)
 	})
+
+	t.Run("spurious LFs are skipped in target", func(t *testing.T) {
+		target := []string{"foo", "\n", "\n", "bar"}
+		source := []string{"foo", "\n", "bar"}
+
+		lexer := &cptest.Lexer{}
+
+		got, ok := lexer.Compare(target, source)
+
+		want := []cptest.RichText{
+			{target[0], lexer.GenMaskForString(target[0], source[0])},
+			{target[1], lexer.GenMaskForString(target[1], source[1])},
+			{target[2], []bool{true}},
+			{target[3], lexer.GenMaskForString(target[3], source[2])},
+		}
+
+		cptest.AssertDiffFailure(t, ok)
+		cptest.AssertEnrichedLexSequence(t, got, want)
+	})
+
+	t.Run("spurious LFs are skipped in source", func(t *testing.T) {
+		target := []string{"foo", "\n", "bar"}
+		source := []string{"foo", "\n", "\n", "bar"}
+
+		lexer := &cptest.Lexer{}
+
+		got, ok := lexer.Compare(target, source)
+
+		want := []cptest.RichText{
+			{target[0], lexer.GenMaskForString(target[0], source[0])},
+			{target[1], lexer.GenMaskForString(target[1], source[1])},
+			{target[2], lexer.GenMaskForString(target[2], source[3])},
+		}
+
+		cptest.AssertDiffSuccess(t, ok)
+		cptest.AssertEnrichedLexSequence(t, got, want)
+	})
 }
 
 func TestGenMaskForString(t *testing.T) {
diff --git a/testing.go b/testing.go
index a0ecdb7..eeb550f 100644
--- a/testing.go
+++ b/testing.go
@@ -217,22 +217,15 @@ func AssertRichTextMask(t *testing.T, got, want []bool) {
 func AssertEnrichedLexSequence(t *testing.T, got, want []RichText) {
 	t.Helper()
 
-	commonLen := len(got)
-	if len(want) < commonLen {
-		commonLen = len(want)
-	}
-
-	for i := 0; i < commonLen; i++ {
-		AssertRichText(t, got[i], want[i])
-	}
+    gotStr := DumpLexemes(got, aurora.BoldFm)
+    gotStr = strings.ReplaceAll(gotStr, "\n", "\\n")
 
-	for i := commonLen; i < len(got); i++ {
-		AssertRichText(t, got[i], RichText{"", []bool{}})
-	}
+    wantStr := DumpLexemes(want, aurora.BoldFm)
+    wantStr = strings.ReplaceAll(wantStr, "\n", "\\n")
 
-	for i := commonLen; i < len(want); i++ {
-		AssertRichText(t, RichText{"", []bool{}}, want[i])
-	}
+    if gotStr != wantStr {
+        t.Errorf("got lexemes '%s', want '%s'", gotStr, wantStr)
+    }
 }
 
 func AssertText(t *testing.T, got, want string) {