diff --git a/packages/cursorless-engine/src/core/HatTokenMapImpl.ts b/packages/cursorless-engine/src/core/HatTokenMapImpl.ts index 559375d93b3..2df45a88592 100644 --- a/packages/cursorless-engine/src/core/HatTokenMapImpl.ts +++ b/packages/cursorless-engine/src/core/HatTokenMapImpl.ts @@ -23,7 +23,7 @@ const PRE_PHRASE_SNAPSHOT_MAX_AGE_NS = BigInt(6e10); // 60 seconds */ export class HatTokenMapImpl implements HatTokenMap { /** - * This is the active map the changes every time we reallocate hats. It is + * This is the active map that changes every time we reallocate hats. It is * liable to change in the middle of a phrase. */ private activeMap: IndividualHatMap; diff --git a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordScopeHandler.ts b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordScopeHandler.ts index 085a9e87efb..0f378e064c0 100644 --- a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordScopeHandler.ts +++ b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordScopeHandler.ts @@ -16,8 +16,6 @@ export default class WordScopeHandler extends NestedScopeHandler { domain, }: TargetScope): TargetScope[] { const { document } = editor; - // FIXME: Switch to using getMatchesInRange once we are able to properly - // mock away vscode for the unit tests in subtoken.test.ts const offset = document.offsetAt(domain.start); const matches = this.wordTokenizer.splitIdentifier( document.getText(domain), diff --git a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer.ts b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer.ts index 1f5efb3fad8..966162028f2 100644 --- a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer.ts +++ b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer.ts @@ -4,10 +4,8 @@ import { matchText } from "../../../../util/regex"; const CAMEL_REGEX = /\p{Lu}?\p{Ll}+|\p{Lu}+(?!\p{Ll})|\p{N}+/gu; /** - * This class just encapsulates the word-splitting logic from - * {@link WordScopeHandler}. We could probably just inline it into that class, - * but for now we need it here because we can't yet properly mock away vscode - * for the unit tests in subtoken.test.ts. + * This class encapsulates word-splitting logic. + * It is used by the {@link WordScopeHandler} and the hat allocator. */ export default class WordTokenizer { private wordRegex: RegExp; diff --git a/packages/cursorless-engine/src/test/fixtures/subtoken.fixture.ts b/packages/cursorless-engine/src/test/fixtures/subtoken.fixture.ts index 7916fa3baf8..4c11957f218 100644 --- a/packages/cursorless-engine/src/test/fixtures/subtoken.fixture.ts +++ b/packages/cursorless-engine/src/test/fixtures/subtoken.fixture.ts @@ -84,4 +84,13 @@ export const subtokenFixture: Fixture[] = [ input: "_quickBrownFox_", expectedOutput: ["quick", "Brown", "Fox"], }, + { + input: "thisIsATest", + expectedOutput: ["this", "Is", "A", "Test"], + }, + // TODO: Handle this correctly? + // { + // input: "NSURLSession", + // expectedOutput: ["NS", "URL", "Session"], + // }, ]; diff --git a/packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts b/packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts index 3aae820a475..87d6775d8cc 100644 --- a/packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts +++ b/packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts @@ -14,6 +14,12 @@ export type HatMetric = (hat: HatCandidate) => number; */ export const negativePenalty: HatMetric = ({ penalty }) => -penalty; +/** + * @returns A metric that penalizes graphemes that are the first letter of a word within a token + */ +export const avoidFirstLetter: HatMetric = ({ isFirstLetter }) => + isFirstLetter ? -1 : 0; + /** * @param hatOldTokenRanks A map from a hat candidate (grapheme+style combination) to the score of the * token that used the given hat in the previous hat allocation. diff --git a/packages/cursorless-engine/src/util/allocateHats/allocateHats.ts b/packages/cursorless-engine/src/util/allocateHats/allocateHats.ts index 5c22ffab461..556522c8cb9 100644 --- a/packages/cursorless-engine/src/util/allocateHats/allocateHats.ts +++ b/packages/cursorless-engine/src/util/allocateHats/allocateHats.ts @@ -14,11 +14,13 @@ import { Grapheme, TokenGraphemeSplitter } from "../../tokenGraphemeSplitter"; import { chooseTokenHat } from "./chooseTokenHat"; import { getHatRankingContext } from "./getHatRankingContext"; import { getRankedTokens } from "./getRankedTokens"; +import WordTokenizer from "../../processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer"; export interface HatCandidate { grapheme: Grapheme; style: HatStyleName; penalty: number; + isFirstLetter: boolean; } /** @@ -137,6 +139,10 @@ function getTokenRemainingHatCandidates( token: Token, availableGraphemeStyles: DefaultMap, ): HatCandidate[] { + const words = new WordTokenizer( + token.editor.document.languageId, + ).splitIdentifier(token.text); + const firstLetters = new Set(words.map((word) => word.index)); return tokenGraphemeSplitter .getTokenGraphemes(token.text) .flatMap((grapheme) => @@ -145,13 +151,14 @@ function getTokenRemainingHatCandidates( grapheme, style, penalty, + isFirstLetter: firstLetters.has(grapheme.tokenStartOffset), }), ), ); } /** - * @param token The token that recevied the hat + * @param token The token that received the hat * @param chosenHat The hat we chose for the token * @returns An object indicating the hat assigned to the token, along with the * range of the grapheme upon which it sits diff --git a/packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts b/packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts index c1e583677cd..5fc99e005c6 100644 --- a/packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts +++ b/packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts @@ -2,6 +2,7 @@ import { HatStability, TokenHat } from "@cursorless/common"; import { HatCandidate } from "./allocateHats"; import { RankingContext } from "./getHatRankingContext"; import { + avoidFirstLetter, hatOldTokenRank, isOldTokenHat, minimumTokenRankContainingGrapheme, @@ -71,7 +72,10 @@ export function chooseTokenHat( // 4. Narrow to the hats with the lowest penalty negativePenalty, - // 5. Prefer hats that sit on a grapheme that doesn't appear in any highly + // 5. Avoid the first grapheme of the token if possible + avoidFirstLetter, + + // 6. Prefer hats that sit on a grapheme that doesn't appear in any highly // ranked token minimumTokenRankContainingGrapheme(tokenRank, graphemeTokenRanks), ])!; diff --git a/packages/cursorless-vscode-e2e/src/suite/keyboard/basic.vscode.test.ts b/packages/cursorless-vscode-e2e/src/suite/keyboard/basic.vscode.test.ts index 4409c2b6188..c3ae5fc2ad1 100644 --- a/packages/cursorless-vscode-e2e/src/suite/keyboard/basic.vscode.test.ts +++ b/packages/cursorless-vscode-e2e/src/suite/keyboard/basic.vscode.test.ts @@ -38,8 +38,8 @@ async function basic() { await vscode.commands.executeCommand("cursorless.keyboard.modal.modeOn"); - // Target default f - await typeText("df"); + // Target default o + await typeText("do"); // Target containing function await typeText("sf");