avoid allocating hats to the first letter of a word in a token

I propose that we declare that it fixes #1658, at least for now.
cursorless-dev · Aug 20, 2023 · 13a9585 · 13a9585
1 parent 45b18d6
commit 13a9585
Show file tree

Hide file tree

Showing 8 changed files with 33 additions and 11 deletions.
diff --git a/packages/cursorless-engine/src/core/HatTokenMapImpl.ts b/packages/cursorless-engine/src/core/HatTokenMapImpl.ts
@@ -23,7 +23,7 @@ const PRE_PHRASE_SNAPSHOT_MAX_AGE_NS = BigInt(6e10); // 60 seconds
  */
 export class HatTokenMapImpl implements HatTokenMap {
   /**
-   * This is the active map the changes every time we reallocate hats. It is
+   * This is the active map that changes every time we reallocate hats. It is
    * liable to change in the middle of a phrase.
    */
   private activeMap: IndividualHatMap;

diff --git a/...ss-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordScopeHandler.ts b/...ss-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordScopeHandler.ts
@@ -16,8 +16,6 @@ export default class WordScopeHandler extends NestedScopeHandler {
     domain,
   }: TargetScope): TargetScope[] {
     const { document } = editor;
-    // FIXME: Switch to using getMatchesInRange once we are able to properly
-    // mock away vscode for the unit tests in subtoken.test.ts
     const offset = document.offsetAt(domain.start);
     const matches = this.wordTokenizer.splitIdentifier(
       document.getText(domain),

diff --git a/...rless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer.ts b/...rless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer.ts
@@ -4,10 +4,8 @@ import { matchText } from "../../../../util/regex";
 const CAMEL_REGEX = /\p{Lu}?\p{Ll}+|\p{Lu}+(?!\p{Ll})|\p{N}+/gu;
 
 /**
- * This class just encapsulates the word-splitting logic from
- * {@link WordScopeHandler}.  We could probably just inline it into that class,
- * but for now we need it here because we can't yet properly mock away vscode
- * for the unit tests in subtoken.test.ts.
+ * This class encapsulates word-splitting logic.
+ * It is used by the {@link WordScopeHandler} and the hat allocator.
  */
 export default class WordTokenizer {
   private wordRegex: RegExp;

diff --git a/packages/cursorless-engine/src/test/fixtures/subtoken.fixture.ts b/packages/cursorless-engine/src/test/fixtures/subtoken.fixture.ts
@@ -84,4 +84,13 @@ export const subtokenFixture: Fixture[] = [
     input: "_quickBrownFox_",
     expectedOutput: ["quick", "Brown", "Fox"],
   },
+  {
+    input: "thisIsATest",
+    expectedOutput: ["this", "Is", "A", "Test"],
+  },
+  // TODO: Handle this correctly?
+  // {
+  //   input: "NSURLSession",
+  //   expectedOutput: ["NS", "URL", "Session"],
+  // },
 ];
diff --git a/packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts b/packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts
@@ -14,6 +14,12 @@ export type HatMetric = (hat: HatCandidate) => number;
  */
 export const negativePenalty: HatMetric = ({ penalty }) => -penalty;
 
+/**
+ * @returns A metric that penalizes graphemes that are the first letter of a word within a token
+ */
+export const avoidFirstLetter: HatMetric = ({ isFirstLetter }) =>
+  isFirstLetter ? -1 : 0;
+
 /**
  * @param hatOldTokenRanks A map from a hat candidate (grapheme+style combination) to the score of the
  * token that used the given hat in the previous hat allocation.

diff --git a/packages/cursorless-engine/src/util/allocateHats/allocateHats.ts b/packages/cursorless-engine/src/util/allocateHats/allocateHats.ts
@@ -14,11 +14,13 @@ import { Grapheme, TokenGraphemeSplitter } from "../../tokenGraphemeSplitter";
 import { chooseTokenHat } from "./chooseTokenHat";
 import { getHatRankingContext } from "./getHatRankingContext";
 import { getRankedTokens } from "./getRankedTokens";
+import WordTokenizer from "../../processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer";
 
 export interface HatCandidate {
   grapheme: Grapheme;
   style: HatStyleName;
   penalty: number;
+  isFirstLetter: boolean;
 }
 
 /**
@@ -137,6 +139,10 @@ function getTokenRemainingHatCandidates(
   token: Token,
   availableGraphemeStyles: DefaultMap<string, HatStyleMap>,
 ): HatCandidate[] {
+  const words = new WordTokenizer(
+    token.editor.document.languageId,
+  ).splitIdentifier(token.text);
+  const firstLetters = new Set<number>(words.map((word) => word.index));
   return tokenGraphemeSplitter
     .getTokenGraphemes(token.text)
     .flatMap((grapheme) =>
@@ -145,13 +151,14 @@ function getTokenRemainingHatCandidates(
           grapheme,
           style,
           penalty,
+          isFirstLetter: firstLetters.has(grapheme.tokenStartOffset),
         }),
       ),
     );
 }
 
 /**
- * @param token The token that recevied the hat
+ * @param token The token that received the hat
  * @param chosenHat The hat we chose for the token
  * @returns An object indicating the hat assigned to the token, along with the
  * range of the grapheme upon which it sits

diff --git a/packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts b/packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts
@@ -2,6 +2,7 @@ import { HatStability, TokenHat } from "@cursorless/common";
 import { HatCandidate } from "./allocateHats";
 import { RankingContext } from "./getHatRankingContext";
 import {
+  avoidFirstLetter,
   hatOldTokenRank,
   isOldTokenHat,
   minimumTokenRankContainingGrapheme,
@@ -71,7 +72,10 @@ export function chooseTokenHat(
     // 4. Narrow to the hats with the lowest penalty
     negativePenalty,
 
-    // 5. Prefer hats that sit on a grapheme that doesn't appear in any highly
+    // 5. Avoid the first grapheme of the token if possible
+    avoidFirstLetter,
+
+    // 6. Prefer hats that sit on a grapheme that doesn't appear in any highly
     //    ranked token
     minimumTokenRankContainingGrapheme(tokenRank, graphemeTokenRanks),
   ])!;

diff --git a/packages/cursorless-vscode-e2e/src/suite/keyboard/basic.vscode.test.ts b/packages/cursorless-vscode-e2e/src/suite/keyboard/basic.vscode.test.ts
@@ -38,8 +38,8 @@ async function basic() {
 
   await vscode.commands.executeCommand("cursorless.keyboard.modal.modeOn");
 
-  // Target default f
-  await typeText("df");
+  // Target default o
+  await typeText("do");
 
   // Target containing function
   await typeText("sf");