Skip to content

Commit

Permalink
avoid allocating hats to the first letter of a token
Browse files Browse the repository at this point in the history
We could get much fancier than this,
but after running this with a day it appears to help some,
and it is nice and simple.

I propose that we declare that it fixes cursorless-dev#1658,
at least for now.
  • Loading branch information
josharian committed Aug 2, 2023
1 parent 342a771 commit 0385b48
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,26 @@ export class TokenGraphemeSplitter {
* @param token The token to split
* @returns A list of normalised graphemes in {@link token}
*/
getTokenGraphemes = (token: string): Grapheme[] =>
matchAll<Grapheme>(token, GRAPHEME_SPLIT_REGEX, (match) => ({
text: this.normalizeGrapheme(match[0]),
tokenStartOffset: match.index!,
tokenEndOffset: match.index! + match[0].length,
}));
getTokenGraphemes = (token: string): Grapheme[] => {
const graphemes = matchAll<Grapheme>(
token,
GRAPHEME_SPLIT_REGEX,
(match) => ({
text: this.normalizeGrapheme(match[0]),
tokenStartOffset: match.index!,
tokenEndOffset: match.index! + match[0].length,
isFirstLetterGrapheme: false,
}),
);
// iterate through the graphemes, marking the first letter
for (const grapheme of graphemes) {
if (grapheme.text.match(/[a-z]/)) {
grapheme.isFirstLetterGrapheme = true;
break;
}
}
return graphemes;
};

/**
* Normalizes the grapheme {@link rawGraphemeText} based on user
Expand Down Expand Up @@ -201,4 +215,12 @@ export interface Grapheme {

/** The end offset of the grapheme within its containing token */
tokenEndOffset: number;

/**
* Whether this grapheme is the first letter grapheme of the text
* See https://github.com/cursorless-dev/cursorless/issues/1658
* TODO: Consider instead whether this grapheme is the beginning of a word inside the text.
* This is more complicated, because the definition of a word can vary by language.
*/
isFirstLetterGrapheme: boolean;
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ export type HatMetric = (hat: HatCandidate) => number;
*/
export const negativePenalty: HatMetric = ({ penalty }) => -penalty;

/**
* @returns A metric that penalizes graphemes that are the first letter of a token
*/
export const avoidFirstLetter: HatMetric = ({ grapheme }) =>
grapheme.isFirstLetterGrapheme ? -1 : 0;

/**
* @param hatOldTokenRanks A map from a hat candidate (grapheme+style combination) to the score of the
* token that used the given hat in the previous hat allocation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ function getTokenRemainingHatCandidates(
}

/**
* @param token The token that recevied the hat
* @param token The token that received the hat
* @param chosenHat The hat we chose for the token
* @returns An object indicating the hat assigned to the token, along with the
* range of the grapheme upon which it sits
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { HatStability, TokenHat } from "@cursorless/common";
import { HatCandidate } from "./allocateHats";
import { RankingContext } from "./getHatRankingContext";
import {
avoidFirstLetter,
hatOldTokenRank,
isOldTokenHat,
minimumTokenRankContainingGrapheme,
Expand Down Expand Up @@ -71,7 +72,10 @@ export function chooseTokenHat(
// 4. Narrow to the hats with the lowest penalty
negativePenalty,

// 5. Prefer hats that sit on a grapheme that doesn't appear in any highly
// 5. Avoid the first grapheme of the token if possible
avoidFirstLetter,

// 6. Prefer hats that sit on a grapheme that doesn't appear in any highly
// ranked token
minimumTokenRankContainingGrapheme(tokenRank, graphemeTokenRanks),
])!;
Expand Down

0 comments on commit 0385b48

Please sign in to comment.