Skip to content

Commit

Permalink
More work on multicell URL detection
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Jan 11, 2025
1 parent a96b5ba commit 8a5b44d
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 39 deletions.
57 changes: 39 additions & 18 deletions kitty/line.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,26 +172,43 @@ is_url_lc(const ListOfChars *lc) {
return true;
}

static index_type
next_char_pos(const Line *self, index_type x, index_type num) {
const CPUCell *ans = self->cpu_cells + x, *limit = self->cpu_cells + self->xnum;
while (num-- && ans < limit) ans += ans->is_multicell ? mcd_x_limit(ans) - ans->x : 1;
return ans - self->cpu_cells;
}

static index_type
prev_char_pos(const Line *self, index_type x, index_type num) {
const CPUCell *ans = self->cpu_cells + x, *limit = self->cpu_cells - 1;
if (ans->is_multicell) ans -= ans->x;
while (num-- && --ans > limit) if (ans->is_multicell) ans -= ans->x;
return ans > limit ? ans - self->cpu_cells : self->xnum;
}


static index_type
find_colon_slash(Line *self, index_type x, index_type limit, ListOfChars *lc) {
find_colon_slash(Line *self, index_type x, index_type limit, ListOfChars *lc, index_type scale) {
// Find :// at or before x
index_type pos = MIN(x, self->xnum - 1);
enum URL_PARSER_STATES {ANY, FIRST_SLASH, SECOND_SLASH};
enum URL_PARSER_STATES state = ANY;
limit = MAX(2u, limit);
if (pos < limit) return 0;
const CPUCell *c = self->cpu_cells + pos;
index_type n;
#define next_char_is(num, ch) ((n = next_char_pos(self, pos, num)) < self->xnum && cell_is_char(self->cpu_cells + n, ch) && cell_scale(self->cpu_cells + n) == scale)
if (cell_is_char(c, ':')) {
if (next_char_is(1, '/') && next_char_is(2, '/')) state = SECOND_SLASH;
} else if (cell_is_char(c, '/')) {
if (next_char_is(1, '/')) state = FIRST_SLASH;
}
#undef next_char_is

do {
const CPUCell *c = self->cpu_cells + pos;
text_in_cell(c, self->text_cache, lc);
if (!is_hostname_lc(lc)) return false;
if (pos == x) {
if (cell_is_char(c, ':')) {
if (pos + 2 < self->xnum && cell_is_char(self->cpu_cells + pos + 1, '/') && cell_is_char(self->cpu_cells + pos + 2, '/')) state = SECOND_SLASH;
} else if (cell_is_char(c, '/')) {
if (pos + 1 < self->xnum && cell_is_char(self->cpu_cells + pos + 1, '/')) state = FIRST_SLASH;
}
}
switch(state) {
case ANY:
if (cell_is_char(c, '/')) state = FIRST_SLASH;
Expand All @@ -204,7 +221,10 @@ find_colon_slash(Line *self, index_type x, index_type limit, ListOfChars *lc) {
state = cell_is_char(c, '/') ? SECOND_SLASH : ANY;
break;
}
pos--;
pos = prev_char_pos(self, pos, 1);
if (pos >= self->xnum) break;
c = self->cpu_cells + pos;
if (cell_scale(c) != scale) break;
} while(pos >= limit);
return 0;
}
Expand Down Expand Up @@ -249,19 +269,19 @@ has_url_beyond_colon_slash(Line *self, index_type x, ListOfChars *lc) {
}

index_type
line_url_start_at(Line *self, index_type x) {
line_url_start_at(Line *self, index_type x, ListOfChars *lc) {
// Find the starting cell for a URL that contains the position x. A URL is defined as
// known-prefix://url-chars. If no URL is found self->xnum is returned.
if (self->cpu_cells[x].is_multicell && self->cpu_cells[x].x) x = x > self->cpu_cells[x].x ? x - self->cpu_cells[x].x : 0;
if (x >= self->xnum || self->xnum <= MIN_URL_LEN + 3) return self->xnum;
index_type ds_pos = 0, t;
RAII_ListOfChars(lc);
index_type ds_pos = 0, t, scale = cell_scale(self->cpu_cells + x);
// First look for :// ahead of x
ds_pos = find_colon_slash(self, x + OPT(url_prefixes).max_prefix_len + 3, x < 2 ? 0 : x - 2, &lc);
if (ds_pos != 0 && has_url_beyond_colon_slash(self, ds_pos, &lc)) {
ds_pos = find_colon_slash(self, x + OPT(url_prefixes).max_prefix_len + 3, x < 2 ? 0 : x - 2, lc, scale);
if (ds_pos != 0 && has_url_beyond_colon_slash(self, ds_pos, lc)) {
if (has_url_prefix_at(self, ds_pos, ds_pos > x ? ds_pos - x: 0, &t)) return t;
}
ds_pos = find_colon_slash(self, x, 0, &lc);
if (ds_pos == 0 || self->xnum < ds_pos + MIN_URL_LEN + 3 || !has_url_beyond_colon_slash(self, ds_pos, &lc)) return self->xnum;
ds_pos = find_colon_slash(self, x, 0, lc, scale);
if (ds_pos == 0 || self->xnum < ds_pos + MIN_URL_LEN + 3 || !has_url_beyond_colon_slash(self, ds_pos, lc)) return self->xnum;
if (has_url_prefix_at(self, ds_pos, 0, &t)) return t;
return self->xnum;
}
Expand Down Expand Up @@ -302,7 +322,8 @@ line_startswith_url_chars(Line *self, bool in_hostname) {
static PyObject*
url_start_at(Line *self, PyObject *x) {
#define url_start_at_doc "url_start_at(x) -> Return the start cell number for a URL containing x or self->xnum if not found"
return PyLong_FromUnsignedLong((unsigned long)line_url_start_at(self, PyLong_AsUnsignedLong(x)));
RAII_ListOfChars(lc);
return PyLong_FromUnsignedLong((unsigned long)line_url_start_at(self, PyLong_AsUnsignedLong(x), &lc));
}

static PyObject*
Expand Down
1 change: 1 addition & 0 deletions kitty/line.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ const char* cell_as_sgr(const GPUCell *, const GPUCell *);
static inline bool cell_has_text(const CPUCell *c) { return c->ch_and_idx != 0; }
static inline void cell_set_char(CPUCell *c, char_type ch) { c->ch_and_idx = ch & 0x7fffffff; }
static inline bool cell_is_char(const CPUCell *c, char_type ch) { return c->ch_and_idx == ch; }
static inline index_type cell_scale(const CPUCell *c) { return c->is_multicell ? c->scale : 1; }
static inline unsigned num_codepoints_in_cell(const CPUCell *c, const TextCache *tc) {
unsigned ans;
if (c->ch_is_idx) {
Expand Down
2 changes: 1 addition & 1 deletion kitty/lineops.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ typedef Line*(get_line_func)(void *, int);
void line_clear_text(Line *self, unsigned int at, unsigned int num, char_type ch);
void line_apply_cursor(Line *self, const Cursor *cursor, unsigned int at, unsigned int num, bool clear_char);
char_type line_get_char(Line *self, index_type at);
index_type line_url_start_at(Line *self, index_type x);
index_type line_url_start_at(Line *self, index_type x, ListOfChars *lc);
index_type line_url_end_at(Line *self, index_type x, bool, char_type, bool, bool, index_type);
bool line_startswith_url_chars(Line*, bool);
bool line_as_ansi(Line *self, ANSILineState *s, index_type start_at, index_type stop_before, char_type prefix_char, bool skip_multiline_non_zero_lines) __attribute__((nonnull));
Expand Down
57 changes: 37 additions & 20 deletions kitty/screen.c
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,19 @@ init_line(Screen *self, index_type y) {
return self->linebuf->line;
}

static void
visual_line(Screen *self, int y_, Line *line) {
index_type y = MAX(0, y_);
if (self->scrolled_by) {
if (y < self->scrolled_by) {
historybuf_init_line(self->historybuf, self->scrolled_by - 1 - y, line);
return;
}
y -= self->scrolled_by;
}
init_line_(self, y, line);
}

static Line*
visual_line_(Screen *self, int y_) {
index_type y = MAX(0, y_);
Expand Down Expand Up @@ -3740,33 +3753,37 @@ screen_detect_url(Screen *screen, unsigned int x, unsigned int y) {
index_type url_start, url_end = 0;
Line *line = screen_visual_line(screen, y);
if (!line || x >= screen->columns) return 0;
if (line->cpu_cells[x].is_multicell && line->cpu_cells[x].scale > 1 && line->cpu_cells[x].y) {
if (line->cpu_cells[x].y > y) return 0;
y -= line->cpu_cells[x].y;
line = screen_visual_line(screen, y);
}
if (line->cpu_cells[x].is_multicell && line->cpu_cells[x].x) x = x > line->cpu_cells[x].x ? x - line->cpu_cells[x].x : 0;
hyperlink_id_type hid;
if ((hid = line->cpu_cells[x].hyperlink_id)) {
screen_mark_hyperlink(screen, x, y);
return hid;
}
char_type sentinel = 0;
bool newlines_allowed = !is_excluded_from_url('\n');
const bool newlines_allowed = !is_excluded_from_url('\n');
index_type last_hostname_char_pos = screen->columns;
if (line) {
url_start = line_url_start_at(line, x);
if (url_start < line->xnum) {
bool next_line_starts_with_url_chars = false;
if (y < screen->lines - 1) {
line = screen_visual_line(screen, y+1);
next_line_starts_with_url_chars = line_startswith_url_chars(line, last_hostname_char_pos >= line->xnum);
if (next_line_starts_with_url_chars && !newlines_allowed && !line->attrs.is_continued) next_line_starts_with_url_chars = false;
line = screen_visual_line(screen, y);
}
sentinel = get_url_sentinel(line, url_start);
index_type slash_count = 0;
for (index_type i = url_start; i < line->xnum; i++) {
if (cell_is_char(line->cpu_cells + i, '/') && ++slash_count > 2) { last_hostname_char_pos = i - 1; break; }
}
url_end = line_url_end_at(line, x, true, sentinel, next_line_starts_with_url_chars, x <= last_hostname_char_pos, last_hostname_char_pos);
url_start = line_url_start_at(line, x, screen->lc);
Line scratch = {.xnum=line->xnum, .text_cache=line->text_cache};
if (url_start < line->xnum) {
bool next_line_starts_with_url_chars = false;
if (y < screen->lines - 1) {
visual_line(screen, y + 1, &scratch);
next_line_starts_with_url_chars = line_startswith_url_chars(&scratch, last_hostname_char_pos >= line->xnum);
if (next_line_starts_with_url_chars && !newlines_allowed && !scratch.attrs.is_continued) next_line_starts_with_url_chars = false;
}
sentinel = get_url_sentinel(line, url_start);
index_type slash_count = 0;
for (index_type i = url_start; i < line->xnum; i++) {
if (cell_is_char(line->cpu_cells + i, '/') && ++slash_count > 2) { last_hostname_char_pos = i - 1; break; }
}
has_url = url_end > url_start;
url_end = line_url_end_at(line, x, true, sentinel, next_line_starts_with_url_chars, x <= last_hostname_char_pos, last_hostname_char_pos);
}
has_url = url_end > url_start;
if (has_url) {
index_type y_extended = y;
extend_url(screen, line, &url_end, &y_extended, sentinel, newlines_allowed, last_hostname_char_pos);
Expand Down Expand Up @@ -4250,7 +4267,7 @@ screen_visual_line(Screen *self, index_type y) {
}

static PyObject*
visual_line(Screen *self, PyObject *args) {
pyvisual_line(Screen *self, PyObject *args) {
// The line corresponding to the yth visual line, taking into account scrolling
unsigned int y;
if (!PyArg_ParseTuple(args, "I", &y)) return NULL;
Expand Down Expand Up @@ -5379,7 +5396,7 @@ static PyMethodDef methods[] = {
MND(dump_lines_with_attrs, METH_VARARGS)
MND(cpu_cells, METH_VARARGS)
MND(cursor_at_prompt, METH_NOARGS)
MND(visual_line, METH_VARARGS)
{"visual_line", (PyCFunction)pyvisual_line, METH_VARARGS, ""},
MND(current_url_text, METH_NOARGS)
MND(draw, METH_O)
MND(apply_sgr, METH_O)
Expand Down

0 comments on commit 8a5b44d

Please sign in to comment.