From 990ce49a323d2dfd8a36565718f9c7f21dec0ef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=97=B6=E7=91=BE?= <74231782+sj817@users.noreply.github.com> Date: Fri, 27 Dec 2024 10:15:20 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E9=A1=B5=E9=9D=A2?= =?UTF-8?q?=E6=B1=A0=E7=AE=A1=E7=90=86=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E9=A1=B5=E9=9D=A2=E7=9A=84=E5=88=9B=E5=BB=BA=E3=80=81?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E3=80=81=E9=87=8A=E6=94=BE=E5=92=8C=E5=85=B3?= =?UTF-8?q?=E9=97=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 3 +- src/puppeteer/core.ts | 77 +++++++--------- src/puppeteer/pagePool.ts | 179 ++++++++++++++++++++++++++++++++++++++ test/index.js | 30 +++++++ test/test.html | 133 ++++++++++++++++++++++++++++ 5 files changed, 374 insertions(+), 48 deletions(-) create mode 100644 src/puppeteer/pagePool.ts create mode 100644 test/index.js create mode 100644 test/test.html diff --git a/package.json b/package.json index 63824cd..828c86a 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,8 @@ "build": "tsc --project tsconfig.json && tsc-alias -p tsconfig.json", "pub": "npm publish --access public", "sort": "npx sort-package-json", - "sync": "curl -X PUT \"https://registry-direct.npmmirror.com/-/package/@karinjs/puppeteer-core/syncs\"" + "sync": "curl -X PUT \"https://registry-direct.npmmirror.com/-/package/@karinjs/puppeteer-core/syncs\"", + "test": "node ./test/index.js" }, "dependencies": { "decompress": "4.2.1", diff --git a/src/puppeteer/core.ts b/src/puppeteer/core.ts index cc3bc6e..ef57cfc 100644 --- a/src/puppeteer/core.ts +++ b/src/puppeteer/core.ts @@ -1,6 +1,7 @@ import { common } from '@Common' import { ChildProcess } from 'child_process' import puppeteer, { Browser, GoToOptions, HTTPRequest, Page, LaunchOptions, ScreenshotOptions } from 'puppeteer-core' +import { PagePool } from './pagePool' export interface screenshot extends ScreenshotOptions { /** http地址、本地文件路径、html字符串 */ @@ -100,6 +101,8 @@ export class Render { process!: ChildProcess | null /** 页面实例 */ // pages: Page[] + pagePool!: PagePool + constructor (id: number, config: LaunchOptions) { this.id = id this.config = config @@ -116,10 +119,13 @@ export class Render { /** 浏览器id */ this.process = this.browser.process() + // 初始化页面池 + this.pagePool = new PagePool(this) + /** 监听浏览器关闭事件 移除浏览器实例 */ this.browser.on('disconnected', async () => { console.error(`[浏览器][${this.id}] 已关闭或崩溃`) - + await this.pagePool.closeAll() /** 传递一个浏览器崩溃事件出去 用于在浏览器池子中移除掉当前浏览器 */ common.emit('browserCrash', this.id) /** 尝试关闭 */ @@ -144,7 +150,7 @@ export class Render { try { this.list.set(echo, true) /** 创建页面 */ - page = await this.page(data) + page = await this.newPage(data) const options = { path: data.path, @@ -214,13 +220,20 @@ export class Render { } return list as RenderResult + } catch (error) { + /** 如果发生错误,从池中移除页面 */ + if (page) { + await this.pagePool.removePage(page) + page = undefined + } + throw error } finally { /** 从队列中去除 */ this.list.delete(echo) if (page) { common.emit('screenshot', this.id) - page.removeAllListeners() - await page?.close().catch(() => { }) + // 不再直接关闭页面,而是将其释放回池中 + await this.pagePool.releasePage(page) } } } @@ -229,55 +242,25 @@ export class Render { * 初始化页面 * @param data 截图参数 */ - async page (data: screenshot) { - /** 创建页面 */ - const page = await this.browser.newPage() - // let page: Page + async newPage (data: screenshot) { + let page: Page - /** 打开页面数+1 */ - common.emit('newPage', this.id) - - // /** 如果waitUntil传参了 直接加载页面 */ - // if (data?.pageGotoParams?.waitUntil) { - // /** 有监听器需求 new一个 */ - // if (typeof data.setRequestInterception === 'function') { - // page = await this.browser.newPage() - // this.pages.push(page) - - // /** 请求拦截处理 */ - // await page.setRequestInterception(true) - // page.on('request', (req) => data.setRequestInterception!(req, data)) - // } else { - // /** 无监听器需求 从页面中拿一个 */ - // page = this.pages[0] - // } - - // /** 设置HTTP 标头 */ - // if (data.headers) await page.setExtraHTTPHeaders(data.headers) - - // /** 打开、加载页面 */ - // if (data.file.startsWith('http') || data.file.startsWith('file://')) { - // await page.goto(data.file, data.pageGotoParams) - // } else { - // await page.setContent(data.file, data.pageGotoParams) - // } - // } else { - // /** 有监听器需求 new一个 */ - // page = await this.browser.newPage() - // this.pages.push(page) - // /** 设置HTTP 标头 */ - // if (data.headers) await page.setExtraHTTPHeaders(data.headers) - // /** 模拟0毫秒的waitUntil */ - // await this.simulateWaitUntil(page, data) - // } - - /** 设置HTTP 标头 */ - if (data.headers) await page.setExtraHTTPHeaders(data.headers) if (typeof data.setRequestInterception === 'function') { + page = await this.pagePool.createPage() + + /** 设置HTTP 标头 */ + if (data.headers) await page.setExtraHTTPHeaders(data.headers) await page.setRequestInterception(true) page.on('request', (req) => data.setRequestInterception!(req, data)) + } else { + page = await this.pagePool.acquirePage() + /** 设置HTTP 标头 */ + if (data.headers) await page.setExtraHTTPHeaders(data.headers) } + /** 打开页面数+1 */ + common.emit('newPage', this.id) + /** 打开页面 */ if (data.file.startsWith('http') || data.file.startsWith('file://')) { await page.goto(data.file, data.pageGotoParams) diff --git a/src/puppeteer/pagePool.ts b/src/puppeteer/pagePool.ts new file mode 100644 index 0000000..4b46576 --- /dev/null +++ b/src/puppeteer/pagePool.ts @@ -0,0 +1,179 @@ +import { Page } from 'puppeteer-core' +import { Render } from './core' + +interface PageInfo { + /** 页面对象 */ + page: Page + /** 状态:idle 空闲,busy 忙碌 */ + status: 'idle' | 'busy' + /** 最后使用时间 */ + lastUsed: number + /** 空闲定时器 */ + timer?: NodeJS.Timeout +} + +export class PagePool { + private pool: Map = new Map() + private maxSize: number = 10 + private idleTimeout: number = 60000 // 1分钟 + private render: Render + + constructor (render: Render) { + this.render = render + this.initFirstPage() + } + + /** + * 初始化第一个页面 + */ + private async initFirstPage () { + await this.createNewPage() + } + + /** + * 生成一个随机ID + * @returns 随机ID + */ + private generateId (): string { + return Math.random().toString(36).substring(2, 15) + } + + /** + * 创建一个新的页面 + */ + private async createNewPage (): Promise { + const page = await this.render.browser.newPage() + const id = this.generateId() + + this.pool.set(id, { + page, + status: 'idle', + lastUsed: Date.now() + }) + + return id + } + + /** + * 启动空闲定时器 + * @param id 页面ID + * @param timeout 超时时间 + */ + private startIdleTimer (id: string) { + const pageInfo = this.pool.get(id) + if (!pageInfo) return + + // 清除之前的定时器 + if (pageInfo.timer) { + clearTimeout(pageInfo.timer) + } + + // 设置新的定时器 + pageInfo.timer = setTimeout(async () => { + const info = this.pool.get(id) + if (info && info.status === 'idle') { + await info.page.close().catch(() => { }) + this.pool.delete(id) + } + }, this.idleTimeout) + } + + /** + * 创建一个新的页面 + */ + async createPage (): Promise { + const id = await this.createNewPage() + const info = this.pool.get(id)! + info.status = 'busy' + return info.page + } + + /** + * 获取一个页面 + */ + async acquirePage (): Promise { + // 查找空闲页面 + for (const [, info] of this.pool.entries()) { + if (info.status === 'idle') { + info.status = 'busy' + info.lastUsed = Date.now() + if (info.timer) { + clearTimeout(info.timer) + } + return info.page + } + } + + // 如果没有空闲页面且未达到最大限制,创建新页面 + if (this.pool.size < this.maxSize) { + const id = await this.createNewPage() + const info = this.pool.get(id)! + info.status = 'busy' + return info.page + } + + // 如果达到最大限制,等待某个页面空闲 + return new Promise((resolve) => { + const checkInterval = setInterval(async () => { + for (const [, info] of this.pool.entries()) { + if (info.status === 'idle') { + clearInterval(checkInterval) + info.status = 'busy' + info.lastUsed = Date.now() + if (info.timer) { + clearTimeout(info.timer) + } + resolve(info.page) + return + } + } + }, 100) + }) + } + + /** + * 释放一个页面 + */ + async releasePage (page: Page) { + for (const [id, info] of this.pool.entries()) { + if (info.page === page) { + info.status = 'idle' + info.lastUsed = Date.now() + this.startIdleTimer(id) + break + } + } + } + + /** + * 关闭所有页面 + */ + async closeAll () { + for (const [, info] of this.pool.entries()) { + if (info.timer) { + clearTimeout(info.timer) + } + await info.page.close().catch(() => { }) + } + this.pool.clear() + } + + /** + * 从池中移除指定页面 + */ + async removePage (page: Page) { + for (const [id, info] of this.pool.entries()) { + if (info.page === page) { + if (info.timer) { + clearTimeout(info.timer) + } + // 移除所有事件监听器 + page.removeAllListeners() + // 关闭页面 + await info.page.close().catch(() => { }) + this.pool.delete(id) + break + } + } + } +} diff --git a/test/index.js b/test/index.js new file mode 100644 index 0000000..7e27f8a --- /dev/null +++ b/test/index.js @@ -0,0 +1,30 @@ +import fs from 'fs' +import Puppeteer from '../lib/index.js' + +// 使用示例 +const chrome = new Puppeteer({ chrome: 'chrome', headless: false }) + +await chrome.init() + +await new Promise((resolve) => setTimeout(resolve, 1000)) + +const screenshot = async () => { + /** 计算耗时 */ + console.time('截图耗时') + const image = await chrome.screenshot({ + file: 'file://D:/QQBot/karin-puppeteer-core/test/test.html', + encoding: 'base64', + type: 'png', + }) + + console.timeEnd('截图耗时') + fs.writeFileSync('image.png', Buffer.from(image, 'base64')) +} + +// 监听控制台输出 输入p则截图 +process.stdin.on('data', async (data) => { + console.log(data.toString().trim()) + if (data.toString().trim() === 'p') { + await screenshot() + } +}) diff --git a/test/test.html b/test/test.html new file mode 100644 index 0000000..8457534 --- /dev/null +++ b/test/test.html @@ -0,0 +1,133 @@ + + + + + + + + +
+
+
+
头像
+

张三

+
+
+
年龄:28岁
+
职业:前端工程师
+
所在地:北京市朝阳区
+
邮箱:zhangsan@example.com
+
+
+

热爱技术,专注于前端开发5年,擅长 Vue.js 和 React。工作之余喜欢摄影和旅行,希望能用技术改变世界。

+
+
+
+ + + + + \ No newline at end of file