feat: merge main

web-infra-dev · Jan 24, 2025 · 469c7a2 · 469c7a2
2 parents 8a449f3 + 4c82eff
commit 469c7a2
Show file tree

Hide file tree

Showing 24 changed files with 134 additions and 36 deletions.
diff --git a/.github/workflows/ai.yml b/.github/workflows/ai.yml
@@ -124,8 +124,10 @@ jobs:
         if-no-files-found: ignore
 
     - name: Check if script failed
-      if: steps.test-ai.outcome == 'failure'
+      if: steps.test-ai.outcome == 'failure' || steps.e2e-tests.outcome == 'failure' || steps.e2e-tests-cache.outcome == 'failure' || steps.e2e-tests-report.outcome == 'failure'
       run: exit 1
 
 
 
+
+
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ Midscene.js is an AI-powered automation SDK with the abilities to control the pa
 
 | Instruction  | Video |
 | :---:  | :---: |
-| Post a Twitter      |    <video src="https://github.com/user-attachments/assets/bb3d695a-fbff-4af1-b6cc-5e967c07ccee" height="300" />    |
+| Post a Tweet      |    <video src="https://github.com/user-attachments/assets/bb3d695a-fbff-4af1-b6cc-5e967c07ccee" height="300" />    |
 | Use JS code to drive task orchestration, collect information about Jay Chou's concert, and write it into Google Docs   | <video src="https://github.com/user-attachments/assets/75474138-f51f-4c54-b3cf-46d61d059999" height="300" />        |
 
 
@@ -76,7 +76,7 @@ There are so many UI automation tools out there, and each one seems to be all-po
 
 ## 🤝 Community
 
-* [Discord](https://discord.gg/AFHJBdnn)
+* [Discord](https://discord.gg/XEYR3tmZ)
 * [Follow us on X](https://x.com/midscene_ai)
 * [Lark Group](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=291q2b25-e913-411a-8c51-191e59aab14d)
 

diff --git a/README.zh.md b/README.zh.md
@@ -78,7 +78,7 @@ Midscene.js 是一个由 AI 驱动的自动化 SDK，能够使用自然语言对
 ## 🤝 社区
 
 * [飞书交流群](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=291q2b25-e913-411a-8c51-191e59aab14d)
-* [Discord](https://discord.gg/AFHJBdnn)
+* [Discord](https://discord.gg/XEYR3tmZ)
 * [Follow us on X](https://x.com/midscene_ai)
 
   <img src="https://github.com/user-attachments/assets/211b05c9-3ccd-4f52-b798-f3a7f51330ed" alt="lark group link" width="300" />

diff --git a/apps/site/docs/en/index.mdx b/apps/site/docs/en/index.mdx
@@ -93,5 +93,5 @@ For more details, please refer to [Data Privacy](./data-privacy).
 
 * [GitHub - give us a star!](https://github.com/web-infra-dev/midscene)
 * [Twitter](https://x.com/midscene_ai)
-* [Discord](https://discord.gg/AFHJBdnn)
+* [Discord](https://discord.gg/XEYR3tmZ)
 * [Lark](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=291q2b25-e913-411a-8c51-191e59aab14d)
diff --git a/apps/site/docs/zh/index.mdx b/apps/site/docs/zh/index.mdx
@@ -80,5 +80,5 @@ Midscene 运行在用户的自有环境中，所有从页面收集的数据会
 
 * [GitHub - 请给我们点个 star](https://github.com/web-infra-dev/midscene)
 * [Twitter](https://x.com/midscene_ai)
-* [Discord](https://discord.gg/AFHJBdnn)
+* [Discord](https://discord.gg/XEYR3tmZ)
 * [飞书交流群](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=291q2b25-e913-411a-8c51-191e59aab14d)
diff --git a/apps/site/rspress.config.ts b/apps/site/rspress.config.ts
@@ -22,7 +22,7 @@ export default defineConfig({
       {
         icon: 'discord',
         mode: 'link',
-        content: 'https://discord.gg/AFHJBdnn',
+        content: 'https://discord.gg/XEYR3tmZ',
       },
       {
         icon: 'x',

diff --git a/packages/midscene/package.json b/packages/midscene/package.json
@@ -34,8 +34,9 @@
     "computer": "TEST_COMPUTER=true npm run test:ai -- tests/ai/evaluate/computer.test.ts",
     "evaluate": "npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
     "evaluate:assertion": "npm run test:ai -- tests/ai/evaluate/assertion.test.ts",
-    "prompt": "npm run test:ai -- tests/ai/parse-action.test.ts",
+    "evaluate:plan": "npm run test:ai -- tests/ai/evaluate/plan/planning.test.ts",
     "evaluate:update": "UPDATE_AI_DATA=true npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
+    "prompt": "npm run test:ai -- tests/ai/parse-action.test.ts",
     "prepublishOnly": "npm run build"
   },
   "dependencies": {

diff --git a/packages/midscene/tests/ai/evaluate/assertion.test.ts b/packages/midscene/tests/ai/evaluate/assertion.test.ts
@@ -9,8 +9,8 @@ import {
   repeatFile,
 } from './test-suite/util';
 import 'dotenv/config';
+import { repeatTime } from '../util';
 
-const repeatTime = 2;
 const testSources = [
   // 'todo',
   'online_order',

diff --git a/packages/midscene/tests/ai/evaluate/inspect.test.ts b/packages/midscene/tests/ai/evaluate/inspect.test.ts
@@ -14,8 +14,8 @@ import {
   runTestCases,
 } from './test-suite/util';
 import 'dotenv/config';
+import { repeatTime } from '../util';
 
-const repeatTime = 2;
 const relocateAfterPlanning = false;
 const failCaseThreshold = process.env.CI ? 1 : 0;
 const testSources = [

diff --git a/packages/midscene/tests/ai/evaluate/plan/__snapshots__/planning-input.test.ts.snap b/packages/midscene/tests/ai/evaluate/plan/__snapshots__/planning-input.test.ts.snap
@@ -0,0 +1,89 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`automation - planning input > input value 1`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the input field with placeholder 'What needs to be done?'",
+    },
+    "param": {
+      "value": "learning english",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+]
+`;
+
+exports[`automation - planning input > input value 2`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the input field labeled 'What needs to be done?'",
+    },
+    "param": {
+      "value": "learning english",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+  {
+    "locate": null,
+    "param": {
+      "value": "Enter",
+    },
+    "thought": undefined,
+    "type": "KeyboardPress",
+  },
+]
+`;
+
+exports[`automation - planning input > input value Add, delete, correct and check 1`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the task input box with the content 'Learn English'",
+    },
+    "param": {
+      "value": "Learn English tomorrow",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+]
+`;
+
+exports[`automation - planning input > input value Add, delete, correct and check 2`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the input box containing 'Learn English'",
+    },
+    "param": {
+      "value": "Learn Skiing",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+]
+`;
+
+exports[`automation - planning input > input value Add, delete, correct and check 3`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the task input box containing 'Learn English'",
+    },
+    "param": {
+      "value": "Learn",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+]
+`;
diff --git a/packages/midscene/tests/ai/evaluate/plan/planning-input.test.ts b/packages/midscene/tests/ai/evaluate/plan/planning-input.test.ts
@@ -1,7 +1,7 @@
 import { plan } from '@/ai-model';
 /* eslint-disable max-lines-per-function */
 import { describe, expect, it, vi } from 'vitest';
-import { makePlanResultStable } from '../../util';
+import { makePlanResultStable, repeatTime } from '../../util';
 import { getPageDataOfTestName, repeat } from './../test-suite/util';
 
 vi.setConfig({
@@ -10,7 +10,7 @@ vi.setConfig({
 });
 
 describe('automation - planning input', () => {
-  repeat(5, () =>
+  repeat(repeatTime, () =>
     it('input value', async () => {
       const { context } = await getPageDataOfTestName('todo');
       const instructions = [
@@ -26,7 +26,7 @@ describe('automation - planning input', () => {
     }),
   );
 
-  repeat(5, () =>
+  repeat(repeatTime, () =>
     it('input value Add, delete, correct and check', async () => {
       const { context } = await getPageDataOfTestName('todo-input-with-value');
       const instructions = [

diff --git a/packages/midscene/tests/ai/evaluate/plan/planning.test.ts b/packages/midscene/tests/ai/evaluate/plan/planning.test.ts
@@ -48,7 +48,7 @@ describe('automation - planning', () => {
   it('scroll some element', async () => {
     const { context } = await getPageDataOfTestName('todo');
     const { actions } = await plan(
-      'Scroll left the status filters (with a button named "complete")',
+      'Scroll left the status filters (with a button named "completed")',
       {
         context,
       },

diff --git a/packages/midscene/tests/ai/extract/__snapshots__/extract.test.ts.snap b/packages/midscene/tests/ai/extract/__snapshots__/extract.test.ts.snap
@@ -1,6 +1,6 @@
 // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 
-exports[`assert openAI > online order 1`] = `
+exports[`extract > online order 1`] = `
 {
   "data": [
     {
@@ -17,7 +17,7 @@ exports[`assert openAI > online order 1`] = `
 }
 `;
 
-exports[`assert openAI > todo 1`] = `
+exports[`extract > todo 1`] = `
 {
   "data": [
     "Learn Python",
@@ -29,7 +29,7 @@ exports[`assert openAI > todo 1`] = `
 }
 `;
 
-exports[`assert openAI > todo obj 1`] = `
+exports[`extract > todo obj 1`] = `
 {
   "data": [
     {

diff --git a/packages/midscene/tests/ai/plan/plan-to-target.test.ts b/packages/midscene/tests/ai/plan/plan-to-target.test.ts
@@ -1,12 +1,13 @@
 import path from 'node:path';
 import { vlmPlanning } from '@/ai-model/ui-tars-planning';
 import { savePositionImg } from '@midscene/shared/img';
-import { expect, test } from 'vitest';
+import { expect, it, test } from 'vitest';
 import { getPageTestData } from '../evaluate/test-suite/util';
 
-test.skipIf(!process.env.MIDSCENE_USE_VLM_UI_TARS)(
-  'inspect with quick answer',
-  async () => {
+const isUiTars = process.env.MIDSCENE_USE_VLM_UI_TARS === '1';
+
+test.skipIf(!isUiTars)('only run in ui-tars', () => {
+  it('plan to target', async () => {
     const { context } = await getPageTestData(
       path.join(__dirname, '../evaluate/test-data/todo'),
     );
@@ -56,6 +57,5 @@ test.skipIf(!process.env.MIDSCENE_USE_VLM_UI_TARS)(
       rect: { x: box[0] * width, y: box[1] * height },
       outputPath: path.join(__dirname, 'output.png'),
     });
-    //   expect(cost).toBeLessThan(100);
-  },
-);
+  });
+});
diff --git a/packages/midscene/tests/ai/util.ts b/packages/midscene/tests/ai/util.ts
@@ -1,15 +1,18 @@
 import type { PlanningAction } from '@/types';
 
+export const repeatTime = 1;
 export function makePlanResultStable(plans: PlanningAction[]) {
   return plans.map((plan) => {
     // Removing thinking makes the results stable for snapshot testing
     plan.thought = undefined;
     if (plan.param?.prompt) {
       plan.param.prompt = '';
     }
-    if (plan.quickAnswer) {
-      plan.quickAnswer.reason = '';
-      plan.quickAnswer.text = '';
+    if ('quickAnswer' in plan && plan.quickAnswer) {
+      plan.quickAnswer = {
+        reason: '',
+        text: '',
+      };
     }
     return plan;
   });

diff --git a/packages/web-integration/package.json b/packages/web-integration/package.json
@@ -107,6 +107,7 @@
     "test": "vitest --run",
     "test:u": "vitest --run -u",
     "test:ai": "AI_TEST_TYPE=web npm run test",
+    "test:ai:bridge": "BRIDGE_MODE=true npm run test --inspect packages/web-integration/tests/ai/bridge/agent.test.ts",
     "test:ai:cache": "MIDSCENE_CACHE=true AI_TEST_TYPE=web npm run test",
     "test:ai:all": "npm run test:ai:web && npm run test:ai:native",
     "test:ai:native": "MIDSCENE_CACHE=true AI_TEST_TYPE=native npm run test",

diff --git a/packages/web-integration/src/common/tasks.ts b/packages/web-integration/src/common/tasks.ts
@@ -349,9 +349,13 @@ export class PageTaskExecutor {
               } else if (scrollToEventName === 'untilLeft') {
                 await this.page.scrollUntilLeft(startingPoint);
               } else if (scrollToEventName === 'once' || !scrollToEventName) {
-                if (taskParam.direction === 'down' || !taskParam.direction) {
+                if (
+                  taskParam?.direction === 'down' ||
+                  !taskParam ||
+                  !taskParam.direction
+                ) {
                   await this.page.scrollDown(
-                    taskParam.distance || undefined,
+                    taskParam?.distance || undefined,
                     startingPoint,
                   );
                 } else if (taskParam.direction === 'up') {

diff --git a/packages/web-integration/tests/ai/bridge/agent.test.ts b/packages/web-integration/tests/ai/bridge/agent.test.ts
@@ -8,7 +8,8 @@ vi.setConfig({
   testTimeout: 60 * 1000,
 });
 const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
-describe.skipIf(process.env.CI)(
+
+describe.skipIf(!process.env.BRIDGE_MODE)(
   'fully functional agent in server(cli) side',
   () => {
     it('basic', async () => {

diff --git a/packages/web-integration/tests/ai/web/playwright/ai-auto-todo.spec.ts b/packages/web-integration/tests/ai/web/playwright/ai-auto-todo.spec.ts
@@ -13,11 +13,7 @@ test('ai todo', async ({ ai, aiQuery }) => {
   }
 
   await ai('Enter "Happy Birthday" in the task box');
-  await ai('Enter "Learn" in the task box');
-
-  await ai(
-    'Add "JS today" to base on the existing content(important) of the task box, then press enter',
-  );
+  await ai('Enter "Learn JS today"in the task box, then press Enter to create');
 
   await ai(
     'Enter "Learn Rust tomorrow" in the task box, then press Enter to create',
@@ -27,6 +23,7 @@ test('ai todo', async ({ ai, aiQuery }) => {
   );
 
   const allTaskList = await aiQuery<string[]>('string[], tasks in the list');
+  console.log('allTaskList', allTaskList);
   expect(allTaskList.length).toBe(3);
   expect(allTaskList).toContain('Learn JS today');
   expect(allTaskList).toContain('Learn Rust tomorrow');

diff --git a/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts b/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts
@@ -75,7 +75,7 @@ describe(
       const mid = new PuppeteerAgent(originPage);
 
       // await mid.aiAction('If pop-ups are displayed click seven days out alert');
-
+      await sleep(8000);
       await mid.aiAction(
         'Click the password input in the demo section on page, type "abc"',
       );

diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/input.png
diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/output.png
diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/input.png
diff --git a/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png b/packages/web-integration/tests/unit-test/fixtures/web-extractor/scroll/output.png