Skip to content

Commit

Permalink
feat: consider issue comments in the attention set
Browse files Browse the repository at this point in the history
This is a large rework of how pull requests are searched and fetched from GitHub.
It now happens in two steps. Searching returns only node IDs. Pull requests are
then fetched from this node ID.

This allows to retrieve much more information at once when fetching the pull request,
while still keeping the request cost reasonable (cost=1), to avoid hitting rate limiting.

As a consequence, we now fetch issue comments, reviews and review comments, and consider
all of them when computing the attention set.

Closes #60
  • Loading branch information
pvcnt committed Aug 27, 2024
1 parent 93c7b00 commit f380d3e
Show file tree
Hide file tree
Showing 21 changed files with 1,403 additions and 629 deletions.
1 change: 1 addition & 0 deletions apps/webapp/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"dev": "vite --clearScreen false",
"build": "vite build",
"preview": "vite preview",
"typecheck": "tsc --noEmit",
"lint": "eslint \"src/**/*.{ts,tsx}\"",
"test": "vitest run",
"coverage": "vitest run --coverage"
Expand Down
14 changes: 7 additions & 7 deletions apps/webapp/src/components/PullTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ export type Props = {
onStar?: (v: Pull) => void,
}

const formatDate = (d: string) => {
const formatDate = (d: Date|string) => {
return new Date(d).toLocaleDateString("en", {
year: 'numeric',
month: 'long',
day: 'numeric',
hour: 'numeric',
minute: 'numeric'
year: "numeric",
month: "long",
day: "numeric",
hour: "numeric",
minute: "numeric"
});
}

Expand Down Expand Up @@ -100,7 +100,7 @@ export default function PullTable({ pulls, onStar }: Props) {
</td>
<td>
<Tooltip content={formatDate(pull.updatedAt)}>
<TimeAgo date={new Date(pull.updatedAt)} tooltip={false} timeStyle="round"/>
<TimeAgo date={pull.updatedAt} tooltip={false} timeStyle="round"/>
</Tooltip>
</td>
<td>
Expand Down
69 changes: 45 additions & 24 deletions apps/webapp/src/worker/instance.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import * as Comlink from "comlink";
import { groupBy, unique } from "remeda";
import { groupBy, indexBy, prop, unique } from "remeda";
import { db } from "../lib/db";
import { splitQueries } from "@repo/github";
import type { Pull, Connection } from "@repo/model";
import type { Pull } from "@repo/model";
import { GitHubClient, isInAttentionSet } from "@repo/github";
import { gitHubClient } from "../github";

Expand Down Expand Up @@ -55,49 +55,70 @@ export async function syncPullsOnce(client: GitHubClient, force: boolean = false
await executeActivity("syncPulls", syncPullsIntervalMillis, force, async () => {
const connections = await db.connections.toArray();
const sections = await db.sections.toArray();
const stars = new Set((await db.stars.toArray()).map(star => star.uid));

const connectionByPull: Record<string, Connection> = {};
const rawPulls: Pull[] = (
const rawResults = (
await Promise.all(sections.flatMap(section => {
return connections.flatMap(connection => {
const queries = splitQueries(section.search);
return queries.flatMap(async query => {
const pulls = await client.getPulls(connection, query);
return pulls.map(pull => {
connectionByPull[pull.uid] = connection;
const sections = [section.id];
const starred = stars.has(pull.uid) ? 1 : 0;
return { ...pull, sections, starred };
});
const pulls = await client.searchPulls(connection, query);
return pulls.map(res => ({
...res,
uid: `${connection.id}:${res.id}`,
sections: [section.id],
connection: connection.id,
}));
});
});
}))
).flat();

// Deduplicate pull requests present in multiple sections.
const pulls: Pull[] = Object.values(groupBy(rawPulls, pull => pull.uid))
const results = Object.values(groupBy(rawResults, pull => pull.uid))
.map(vs => ({ ...vs[0], sections: vs.flatMap(v => unique(v.sections)) }));


// Compute whether pull requests are in the attention set after they have been
// deduplicated, since this can be quite expensive to do.
const stars = new Set((await db.stars.toArray()).map(star => star.uid));
const connectionsById = indexBy(connections, prop("id"));
const sectionsInAttentionSet = new Set(sections.filter(v => v.attention).map(v => v.id));
for (const pull of pulls) {
if (pull.sections.some(v => sectionsInAttentionSet.has(v))) {
pull.attention = await isInAttentionSet(client, connectionByPull[pull.uid], pull);
const currentKeys = new Set(await db.pulls.toCollection().primaryKeys());

const stats = { new: 0, unchanged: 0, updated: 0 };

const pulls: Pull[] = await Promise.all(results.map(async res => {
if (currentKeys.has(res.uid)) {
const currentPull = await db.pulls.get(res.uid);
if (currentPull !== undefined && res.updatedAt <= currentPull.updatedAt) {
stats.unchanged += 1;
return currentPull;
} else {
stats.updated += 1;
}
} else {
stats.new += 1;
}
}
const connection = connectionsById[res.connection];
const pull = await client.getPull(connection, res.id);
const mayBeInAttentionSet = res.sections.some(v => sectionsInAttentionSet.has(v));
return {
...pull,
uid: res.uid,
connection: res.connection,
sections: res.sections,
host: connection.host,
starred: stars.has(res.uid) ? 1 : 0,
attention: mayBeInAttentionSet ? isInAttentionSet(connection, pull) : undefined,
}
}));

// Upsert pull requests...
await db.pulls.bulkPut(pulls);

// ... and remove pull requests that are not anymore included in any sections.
const keys = new Set(await db.pulls.toCollection().primaryKeys());
pulls.forEach(pull => keys.delete(pull.uid));
await db.pulls.bulkDelete(Array.from(keys));
const staleKeys = new Set(currentKeys);
pulls.forEach(pull => staleKeys.delete(pull.uid));
await db.pulls.bulkDelete(Array.from(staleKeys));

console.log(`Synced ${pulls.length} pull requests`);
console.log(`Synced ${pulls.length} pull requests: ${stats.new} new, ${stats.updated} updated, ${stats.unchanged} unchanged, ${staleKeys.size} deleted`);
});
}

Expand Down
4 changes: 2 additions & 2 deletions apps/webapp/tests/components/CommandBar.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ describe("command bar", () => {
beforeAll(async () => {
user = userEvent.setup();

await db.pulls.add(mockPull({ uid: "1:1", title: "Some title" }));
await db.pulls.add(mockPull({ uid: "1:2", title: "Another title" }));
await db.pulls.add(mockPull({ id: "PR_1", title: "Some title" }));
await db.pulls.add(mockPull({ id: "PR_2", title: "Another title" }));
})

afterAll(async () => await db.pulls.clear());
Expand Down
38 changes: 21 additions & 17 deletions apps/webapp/tests/worker/instance.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,45 +71,49 @@ describe("sync pulls", () => {
assert(connection !== undefined);

const client = new TestGitHubClient();
client.setPulls(connection, "author:@me draft:true", [
mockPull({ uid: "1:1" }),
mockPull({ uid: "1:2" }),
client.setPullsBySearch(connection, "author:@me draft:true", [
mockPull({ id: "PR_1", connection: connection.id }),
mockPull({ id: "PR_2", connection: connection.id }),
]);
client.setPulls(connection, "author:@me draft:false", [
mockPull({ uid: "1:4" }),
client.setPullsBySearch(connection, "author:@me draft:false", [
mockPull({ id: "PR_4", connection: connection.id }),
]);
client.setPulls(connection, "author:@me review:approved", [
mockPull({ uid: "1:1" }),
mockPull({ uid: "1:3" }),
client.setPullsBySearch(connection, "author:@me review:approved", [
mockPull({ id: "PR_1", connection: connection.id }),
mockPull({ id: "PR_3", connection: connection.id }),
]);

connection = await db.connections.get("2");
assert(connection !== undefined);
client.setPulls(connection, "author:@me draft:true", [
mockPull({ uid: "2:1" }),
client.setPullsBySearch(connection, "author:@me draft:true", [
mockPull({ id: "PR_1", connection: connection.id }),
]);
client.setPulls(connection, "author:@me review:approved", [
mockPull({ uid: "2:2" }),
client.setPullsBySearch(connection, "author:@me review:approved", [
mockPull({ id: "PR_2", connection: connection.id }),
]);

// WHEN syncing pull requests.
await syncPullsOnce(client);

// THEN every pull request must be present in the database.
const pks = await db.pulls.toCollection().primaryKeys();
expect(pks.sort()).toEqual(["1:1", "1:2", "1:3", "1:4", "2:1", "2:2"]);
expect(pks.sort()).toEqual(["1:PR_1", "1:PR_2", "1:PR_3", "1:PR_4", "2:PR_1", "2:PR_2"]);

// THEN every pull request must be in the correct section(s).
let pull = await db.pulls.get("1:1");
let pull = await db.pulls.get("1:PR_1");
expect(pull).toBeDefined();
expect(pull?.sections).toEqual(["1", "2"]);

pull = await db.pulls.get("1:2");
pull = await db.pulls.get("1:PR_2");
expect(pull).toBeDefined();
expect(pull?.sections).toEqual(["1"]);

pull = await db.pulls.get("1:3");
pull = await db.pulls.get("1:PR_3");
expect(pull).toBeDefined();
expect(pull?.sections).toEqual(["2"]);

pull = await db.pulls.get("1:4");
pull = await db.pulls.get("1:PR_4");
expect(pull).toBeDefined();
expect(pull?.sections).toEqual(["1"]);

// THEN the activity should have been updated.
Expand Down
6 changes: 3 additions & 3 deletions apps/website/src/content/docs/user-guide/attention-set.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ Whether a user is in the attention set of a pull request is determined by a set
* A draft, merged or closed pull request has an empty attention set.
* A requested reviewer (either directly or via a team) is in the attention set if the pull request is not already approved.
Once they leave a review, they will not be "requested" anymore.
* An author or reviewer is in the attention set when somebody replied to them.
* An author is in the attention set when a reviewer left a comment in any thread.
* An author or reviewer is in the attention set when somebody replied in an unresolved discussion.
* An author is in the attention set when a reviewer left a comment in an unresolved discussion.
* An author is in the attention set when CI is failing.
* An author is in the attention set when a pull request is approved.
* An author is in the attention set when the pull request is approved.

:::note
Those rules are designed to be "mostly correct", but they are not expected to be perfect and work for all use cases.
Expand Down
1 change: 0 additions & 1 deletion apps/website/src/content/docs/user-guide/inbox.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ The following dialog will open:
The search query uses [GitHub's search language](https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests), with the following modifications:

* A `type:pr` term is implicitely added, you do not need to include it.
* Pull requests are always sorted by last updated first (i.e., `sort:updated`).
* Archived pull requests are excluded by default (i.e., `archived:false`), unless the search query is explicitely searching for archived pull requests.
* Only pull requests from organizations configured in the connection are returned.
* You may provide several individual search queries separated by a semi-colon (";").
Expand Down
6 changes: 3 additions & 3 deletions packages/github/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
"vitest": "^2.0.5"
},
"dependencies": {
"@octokit/core": "^6.1.2",
"@octokit/rest": "^20.1.1",
"@repo/model": "workspace:*"
"@octokit/plugin-throttling": "^9.3.1",
"@repo/model": "workspace:*",
"octokit": "^4.0.2"
}
}
57 changes: 18 additions & 39 deletions packages/github/src/attention.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { type Attention, type PullProps, type Comment, type Connection, PullState, CheckState } from "@repo/model";
import { GitHubClient } from "./client.js";
import type { Attention, PullProps, Connection } from "@repo/model";
import { PullState, CheckState } from "@repo/model";

export async function isInAttentionSet(client: GitHubClient, connection: Connection, pull: PullProps): Promise<Attention> {
export function isInAttentionSet(connection: Connection, pull: PullProps): Attention {
if (pull.state === PullState.Draft || pull.state === PullState.Merged || pull.state === PullState.Closed) {
// Draft, merged and closed pull requests have no attention set.
return { set: false };
Expand All @@ -12,7 +12,7 @@ export async function isInAttentionSet(client: GitHubClient, connection: Connect

const isAuthor = pull.author.name === viewerName;
const isApproved = pull.state === PullState.Approved;
const isReviewer = pull.reviewers.some(r => r.name === viewerName);
const isReviewer = pull.reviews.some(r => r.author?.name === viewerName);
const isRequestedReviewer = pull.requestedReviewers.some(r => r.name === viewerName)
|| pull.requestedTeams.some(r => viewerTeams.has(r.name));

Expand All @@ -21,33 +21,34 @@ export async function isInAttentionSet(client: GitHubClient, connection: Connect
return { set: false };
}

const comments = await client.getComments(connection, pull.repo, pull.number);
const threads = groupByThread(comments);

const reviewerNames = new Set(pull.reviewers.map(r => r.name));
const reviewerNames = new Set(pull.reviews.map(r => r.author?.name));
const commenterNames = new Set<string>();
for (const thread of threads) {
if (thread.every(c => c.author.name === pull.author.name)) {
for (const discussion of pull.discussions) {
if (discussion.comments.every(c => c.author?.name === pull.author.name)) {
// Threads containing only comments from the author are ignored. They are usually
// part of the initial remarks left by the author. If nobody replied, we consider
// they do not refer to any action to take.
continue;
}
const lastViewerCommentPos = thread.findLastIndex(c => c.author.name === viewerName);
if (discussion.resolved) {
// Resolved discussions are ignored.
continue;
}
const lastViewerCommentPos = discussion.comments.findLastIndex(c => c.author?.name === viewerName);
const commentsAfterLastViewerComment = (lastViewerCommentPos === -1)
? comments
: comments.slice(lastViewerCommentPos + 1);
? discussion.comments
: discussion.comments.slice(lastViewerCommentPos + 1);
if (lastViewerCommentPos > -1 && commentsAfterLastViewerComment.length > 0) {
// The author and reviewers are always notified when somebody replied to them.
commentsAfterLastViewerComment
.filter(c => c.author.name !== viewerName)
.forEach(c => commenterNames.add(c.author.name));
.filter(c => c.author?.name !== viewerName)
.forEach(c => commenterNames.add(c.author?.name ?? "Anonymous"));
} else if (isAuthor) {
// The author (and only them) is notified when a reviewer left a comment in any thread,
// even if the author did not participate in this thread.
commentsAfterLastViewerComment
.filter(c => reviewerNames.has(c.author.name))
.forEach(c => commenterNames.add(c.author.name));
.filter(c => reviewerNames.has(c.author?.name))
.forEach(c => commenterNames.add(c.author?.name ?? "Anonymous"));
}
}
if (commenterNames.size > 0) {
Expand All @@ -73,26 +74,4 @@ export async function isInAttentionSet(client: GitHubClient, connection: Connect
} else {
return { set: false };
}
}

/**
* Group all review comments of a pull request into threads.
*
* @param comments Review comments of a pull request.
* @returns Review comments, grouped by thread (in the same order).
*/
function groupByThread(comments: Comment[]): Comment[][] {
// `inReplyTo` contains the identifier of the first comment of a thread.
// The first message in a thread does not have an `inReplyTo`.
const threads: Record<string, Comment[]> = {};
for (const comment of comments) {
if (comment.inReplyTo === undefined) {
threads[comment.uid] = [comment];
} else if (comment.inReplyTo in threads) {
threads[comment.inReplyTo].push(comment);
} else {
threads[comment.inReplyTo] = [comment];
}
}
return Object.values(threads);
}
Loading

0 comments on commit f380d3e

Please sign in to comment.