From 81b85d8e2f2c90fcfeaa6c8212196c68ce58a7b8 Mon Sep 17 00:00:00 2001
From: Sebastian Ullrich <sebasti@nullri.ch>
Date: Wed, 27 Nov 2024 14:06:57 +0100
Subject: [PATCH] fix: reparsing may need to backtrack two commands (#6236)

This PR fixes an issue where edits to a command containing a nested
docstring fail to reparse the entire command.

Fixes #6227
---
 src/Lean/Language/Lean.lean                   | 44 +++++++++++++------
 .../lean/interactive/incrementalCommand.lean  | 12 +++++
 .../incrementalCommand.lean.expected.out      |  1 +
 3 files changed, 44 insertions(+), 13 deletions(-)
diff --git a/src/Lean/Language/Lean.lean b/src/Lean/Language/Lean.lean
index 79a13ffb0244..125a52e4e9cc 100644
--- a/src/Lean/Language/Lean.lean
+++ b/src/Lean/Language/Lean.lean
@@ -46,17 +46,33 @@ delete the space after private, it becomes a syntactically correct structure wit
 privateaxiom! So clearly, because of uses of atomic in the grammar, an edit can affect a command
 syntax tree even across multiple tokens.
 
-Now, what we do today, and have done since Lean 3, is to always reparse the last command completely
-preceding the edit location. If its syntax tree is unchanged, we preserve its data and reprocess all
-following commands only, otherwise we reprocess it fully as well. This seems to have worked well so
-far but it does seem a bit arbitrary given that even if it works for our current grammar, it can
-certainly be extended in ways that break the assumption.
+What we did in Lean 3 was to always reparse the last command completely preceding the edit location.
+If its syntax tree is unchanged, we preserve its data and reprocess all following commands only,
+otherwise we reprocess it fully as well. This worked well but did seem a bit arbitrary given that
+even if it works for a grammar at some point, it can certainly be extended in ways that break the
+assumption.
+
+With grammar changes in Lean 4, we found that the following example indeed breaks this assumption:
+```
+structure Signature where
+  /-- a docstring -/
+  Sort : Type
+    --^ insert: "s"
+```
+As the keyword `Sort` is not a valid start of a structure field and the parser backtracks across the
+docstring in that case, this is parsed as the complete command `structure Signature where` followed
+by the partial command `/-- a docstring -/ <missing>`. If we insert an `s` after the `t`, the last
+command completely preceding the edit location is the partial command containing the docstring. Thus
+we need to go up two commands to ensure we reparse the `structure` command as well. This kind of
+nested docstring is the only part of the grammar to our knowledge that requires going up at least
+two commands; as we never backtrack across more than one docstring, going up two commands should
+also be sufficient.
 
 Finally, a more actually principled and generic solution would be to invalidate a syntax tree when
 the parser has reached the edit location during parsing. If it did not, surely the edit cannot have
 an effect on the syntax tree in question. Sadly such a "high-water mark" parser position does not
 exist currently and likely it could at best be approximated by e.g. "furthest `tokenFn` parse". Thus
-we remain at "go two commands up" at this point.
+we remain at "go up two commands" at this point.
 -/
 
 /-!
@@ -340,11 +356,12 @@ where
     if let some old := old? then
       if let some oldSuccess := old.result? then
         if let some (some processed) ← old.processedResult.get? then
-          -- ...and the edit location is after the next command (see note [Incremental Parsing])...
+          -- ...and the edit is after the second-next command (see note [Incremental Parsing])...
           if let some nextCom ← processed.firstCmdSnap.get? then
-            if (← isBeforeEditPos nextCom.parserState.pos) then
-              -- ...go immediately to next snapshot
-              return (← unchanged old old.stx oldSuccess.parserState)
+            if let some nextNextCom ← processed.firstCmdSnap.get? then
+              if (← isBeforeEditPos nextNextCom.parserState.pos) then
+                -- ...go immediately to next snapshot
+                return (← unchanged old old.stx oldSuccess.parserState)
 
     withHeaderExceptions ({ · with
         ictx, stx := .missing, result? := none, cancelTk? := none }) do
@@ -473,11 +490,12 @@ where
         prom.resolve <| { old with nextCmdSnap? := some { range? := none, task := newProm.result } }
       else prom.resolve old  -- terminal command, we're done!
 
-    -- fast path, do not even start new task for this snapshot
+    -- fast path, do not even start new task for this snapshot (see [Incremental Parsing])
     if let some old := old? then
       if let some nextCom ← old.nextCmdSnap?.bindM (·.get?) then
-        if (← isBeforeEditPos nextCom.parserState.pos) then
-          return (← unchanged old old.parserState)
+        if let some nextNextCom ← nextCom.nextCmdSnap?.bindM (·.get?) then
+          if (← isBeforeEditPos nextNextCom.parserState.pos) then
+            return (← unchanged old old.parserState)
 
     let beginPos := parserState.pos
     let scope := cmdState.scopes.head!
diff --git a/tests/lean/interactive/incrementalCommand.lean b/tests/lean/interactive/incrementalCommand.lean
index cc6359e9bf6c..8829d1d46376 100644
--- a/tests/lean/interactive/incrementalCommand.lean
+++ b/tests/lean/interactive/incrementalCommand.lean
@@ -94,3 +94,15 @@ def f := 1  -- used to raise "unexpected identifier" after edit below because we
 def g := 2
    --^ insert: "g"
    --^ collectDiagnostics
+
+/-!
+Example showing that we need to reparse at least two commands preceding a change; see note
+[Incremental Parsing].
+-/
+-- RESET
+structure Signature where
+  /-- a docstring -/
+  Sort : Type
+    --^ sync
+    --^ insert: "s"
+    --^ collectDiagnostics
diff --git a/tests/lean/interactive/incrementalCommand.lean.expected.out b/tests/lean/interactive/incrementalCommand.lean.expected.out
index 73d7361e3429..0b01cb78b1fd 100644
--- a/tests/lean/interactive/incrementalCommand.lean.expected.out
+++ b/tests/lean/interactive/incrementalCommand.lean.expected.out
@@ -34,3 +34,4 @@ w
    "fullRange":
    {"start": {"line": 2, "character": 2}, "end": {"line": 2, "character": 9}}}]}
 {"version": 3, "uri": "file:///incrementalCommand.lean", "diagnostics": []}
+{"version": 2, "uri": "file:///incrementalCommand.lean", "diagnostics": []}