Skip to content

Commit

Permalink
avoid allocating hats to the first letter of a word in a token
Browse files Browse the repository at this point in the history
I propose that we declare that it fixes #1658,
at least for now.
  • Loading branch information
josharian committed Aug 20, 2023
1 parent 45b18d6 commit 13a9585
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 11 deletions.
2 changes: 1 addition & 1 deletion packages/cursorless-engine/src/core/HatTokenMapImpl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const PRE_PHRASE_SNAPSHOT_MAX_AGE_NS = BigInt(6e10); // 60 seconds
*/
export class HatTokenMapImpl implements HatTokenMap {
/**
* This is the active map the changes every time we reallocate hats. It is
* This is the active map that changes every time we reallocate hats. It is
* liable to change in the middle of a phrase.
*/
private activeMap: IndividualHatMap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ export default class WordScopeHandler extends NestedScopeHandler {
domain,
}: TargetScope): TargetScope[] {
const { document } = editor;
// FIXME: Switch to using getMatchesInRange once we are able to properly
// mock away vscode for the unit tests in subtoken.test.ts
const offset = document.offsetAt(domain.start);
const matches = this.wordTokenizer.splitIdentifier(
document.getText(domain),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@ import { matchText } from "../../../../util/regex";
const CAMEL_REGEX = /\p{Lu}?\p{Ll}+|\p{Lu}+(?!\p{Ll})|\p{N}+/gu;

/**
* This class just encapsulates the word-splitting logic from
* {@link WordScopeHandler}. We could probably just inline it into that class,
* but for now we need it here because we can't yet properly mock away vscode
* for the unit tests in subtoken.test.ts.
* This class encapsulates word-splitting logic.
* It is used by the {@link WordScopeHandler} and the hat allocator.
*/
export default class WordTokenizer {
private wordRegex: RegExp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,13 @@ export const subtokenFixture: Fixture[] = [
input: "_quickBrownFox_",
expectedOutput: ["quick", "Brown", "Fox"],
},
{
input: "thisIsATest",
expectedOutput: ["this", "Is", "A", "Test"],
},
// TODO: Handle this correctly?
// {
// input: "NSURLSession",
// expectedOutput: ["NS", "URL", "Session"],
// },
];
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ export type HatMetric = (hat: HatCandidate) => number;
*/
export const negativePenalty: HatMetric = ({ penalty }) => -penalty;

/**
* @returns A metric that penalizes graphemes that are the first letter of a word within a token
*/
export const avoidFirstLetter: HatMetric = ({ isFirstLetter }) =>
isFirstLetter ? -1 : 0;

/**
* @param hatOldTokenRanks A map from a hat candidate (grapheme+style combination) to the score of the
* token that used the given hat in the previous hat allocation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ import { Grapheme, TokenGraphemeSplitter } from "../../tokenGraphemeSplitter";
import { chooseTokenHat } from "./chooseTokenHat";
import { getHatRankingContext } from "./getHatRankingContext";
import { getRankedTokens } from "./getRankedTokens";
import WordTokenizer from "../../processTargets/modifiers/scopeHandlers/WordScopeHandler/WordTokenizer";

export interface HatCandidate {
grapheme: Grapheme;
style: HatStyleName;
penalty: number;
isFirstLetter: boolean;
}

/**
Expand Down Expand Up @@ -137,6 +139,10 @@ function getTokenRemainingHatCandidates(
token: Token,
availableGraphemeStyles: DefaultMap<string, HatStyleMap>,
): HatCandidate[] {
const words = new WordTokenizer(
token.editor.document.languageId,
).splitIdentifier(token.text);
const firstLetters = new Set<number>(words.map((word) => word.index));
return tokenGraphemeSplitter
.getTokenGraphemes(token.text)
.flatMap((grapheme) =>
Expand All @@ -145,13 +151,14 @@ function getTokenRemainingHatCandidates(
grapheme,
style,
penalty,
isFirstLetter: firstLetters.has(grapheme.tokenStartOffset),
}),
),
);
}

/**
* @param token The token that recevied the hat
* @param token The token that received the hat
* @param chosenHat The hat we chose for the token
* @returns An object indicating the hat assigned to the token, along with the
* range of the grapheme upon which it sits
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { HatStability, TokenHat } from "@cursorless/common";
import { HatCandidate } from "./allocateHats";
import { RankingContext } from "./getHatRankingContext";
import {
avoidFirstLetter,
hatOldTokenRank,
isOldTokenHat,
minimumTokenRankContainingGrapheme,
Expand Down Expand Up @@ -71,7 +72,10 @@ export function chooseTokenHat(
// 4. Narrow to the hats with the lowest penalty
negativePenalty,

// 5. Prefer hats that sit on a grapheme that doesn't appear in any highly
// 5. Avoid the first grapheme of the token if possible
avoidFirstLetter,

// 6. Prefer hats that sit on a grapheme that doesn't appear in any highly
// ranked token
minimumTokenRankContainingGrapheme(tokenRank, graphemeTokenRanks),
])!;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ async function basic() {

await vscode.commands.executeCommand("cursorless.keyboard.modal.modeOn");

// Target default f
await typeText("df");
// Target default o
await typeText("do");

// Target containing function
await typeText("sf");
Expand Down

0 comments on commit 13a9585

Please sign in to comment.