Skip to content

Commit

Permalink
PDF importer (#7610)
Browse files Browse the repository at this point in the history
Signed-off-by: Denis Bykhov <[email protected]>
  • Loading branch information
BykhovDenis authored Jan 9, 2025
1 parent 52e44bf commit d63db0f
Show file tree
Hide file tree
Showing 8 changed files with 371 additions and 27 deletions.
3 changes: 2 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,8 @@
"PASSWORD": "password",
"AVATAR_PATH": "./assets/avatar.png",
"AVATAR_CONTENT_TYPE": ".png",
"LOVE_ENDPOINT": "http://localhost:8096"
"STORAGE_CONFIG": "minio|localhost?accessKey=minioadmin&secretKey=minioadmin",
"LOVE_ENDPOINT": "http://localhost:8096",
},
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
Expand Down
4 changes: 3 additions & 1 deletion common/config/rush/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions services/ai-bot/pod-ai-bot/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"eslint-plugin-n": "^15.4.0",
"eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^6.1.1",
"@types/uuid": "^8.3.1",
"jest": "^29.7.0",
"prettier": "^3.1.0",
"ts-jest": "^29.1.1",
Expand All @@ -56,6 +57,8 @@
"@hcengineering/account": "^0.6.0",
"@hcengineering/ai-bot": "^0.6.0",
"@hcengineering/analytics-collector": "^0.6.0",
"@hcengineering/document": "^0.6.0",
"@hcengineering/attachment": "^0.6.14",
"@hcengineering/chunter": "^0.6.20",
"@hcengineering/client": "^0.6.18",
"@hcengineering/client-resources": "^0.6.27",
Expand All @@ -72,6 +75,8 @@
"@hcengineering/server-token": "^0.6.11",
"@hcengineering/setting": "^0.6.17",
"@hcengineering/text": "^0.6.5",
"@hcengineering/rank": "^0.6.4",
"@hcengineering/server-storage": "^0.6.0",
"@hcengineering/workbench": "^0.6.16",
"@hcengineering/love": "^0.6.0",
"cors": "^2.8.5",
Expand All @@ -80,6 +85,7 @@
"fast-equals": "^5.0.1",
"form-data": "^4.0.0",
"js-tiktoken": "^1.0.14",
"uuid": "^8.3.2",
"mongodb": "^6.12.0",
"openai": "^4.56.0",
"ws": "^8.18.0"
Expand Down
4 changes: 3 additions & 1 deletion services/ai-bot/pod-ai-bot/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ interface Config {
MaxHistoryRecords: number
Port: number
LoveEndpoint: string
DataLabApiKey: string
}

const parseNumber = (str: string | undefined): number | undefined => (str !== undefined ? Number(str) : undefined)
Expand All @@ -61,7 +62,8 @@ const config: Config = (() => {
MaxContentTokens: parseNumber(process.env.MAX_CONTENT_TOKENS) ?? 128 * 100,
MaxHistoryRecords: parseNumber(process.env.MAX_HISTORY_RECORDS) ?? 500,
Port: parseNumber(process.env.PORT) ?? 4010,
LoveEndpoint: process.env.LOVE_ENDPOINT ?? ''
LoveEndpoint: process.env.LOVE_ENDPOINT ?? '',
DataLabApiKey: process.env.DATALAB_API_KEY ?? ''
}

const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined)
Expand Down
36 changes: 28 additions & 8 deletions services/ai-bot/pod-ai-bot/src/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,24 @@ import {
TranslateRequest,
TranslateResponse
} from '@hcengineering/ai-bot'
import { Markup, MeasureContext, Ref, WorkspaceId } from '@hcengineering/core'
import { Room } from '@hcengineering/love'
import { WorkspaceInfoRecord } from '@hcengineering/server-ai-bot'
import { getTransactorEndpoint } from '@hcengineering/server-client'
import { generateToken } from '@hcengineering/server-token'
import OpenAI from 'openai'
import { encodingForModel } from 'js-tiktoken'
import { htmlToMarkup, markupToHTML } from '@hcengineering/text'
import { Markup, MeasureContext, Ref, WorkspaceId } from '@hcengineering/core'
import { Room } from '@hcengineering/love'
import { encodingForModel } from 'js-tiktoken'
import OpenAI from 'openai'

import { WorkspaceClient } from './workspace/workspaceClient'
import { StorageAdapter } from '@hcengineering/server-core'
import { buildStorageFromConfig, storageConfigFromEnv } from '@hcengineering/server-storage'
import config from './config'
import { DbStorage } from './storage'
import { SupportWsClient } from './workspace/supportWsClient'
import { AIReplyTransferData } from './types'
import { tryAssignToWorkspace } from './utils/account'
import { translateHtml } from './utils/openai'
import { SupportWsClient } from './workspace/supportWsClient'
import { WorkspaceClient } from './workspace/workspaceClient'

const CLOSE_INTERVAL_MS = 10 * 60 * 1000 // 10 minutes

Expand All @@ -54,6 +56,7 @@ export class AIControl {
private readonly connectingWorkspaces = new Map<string, Promise<void>>()

readonly aiClient?: OpenAI
readonly storageAdapter: StorageAdapter
readonly encoding = encodingForModel(config.OpenAIModel)

supportClient: SupportWsClient | undefined = undefined
Expand All @@ -70,6 +73,7 @@ export class AIControl {
})
: undefined
void this.connectSupportWorkspace()
this.storageAdapter = buildStorageFromConfig(storageConfigFromEnv())
}

async getWorkspaceRecord (workspace: string): Promise<WorkspaceInfoRecord> {
Expand Down Expand Up @@ -125,10 +129,26 @@ export class AIControl {
this.ctx.info('Listen workspace: ', { workspace })

if (workspace === config.SupportWorkspace) {
return new SupportWsClient(endpoint, token, workspace, this, this.ctx.newChild(workspace, {}), info)
return new SupportWsClient(
this.storageAdapter,
endpoint,
token,
workspace,
this,
this.ctx.newChild(workspace, {}),
info
)
}

return new WorkspaceClient(endpoint, token, workspace, this, this.ctx.newChild(workspace, {}), info)
return new WorkspaceClient(
this.storageAdapter,
endpoint,
token,
workspace,
this,
this.ctx.newChild(workspace, {}),
info
)
}

async initWorkspaceClient (workspace: string): Promise<void> {
Expand Down
57 changes: 55 additions & 2 deletions services/ai-bot/pod-ai-bot/src/utils/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
// limitations under the License.
//

import OpenAI from 'openai'
import { countTokens } from '@hcengineering/openai'
import { Tiktoken } from 'js-tiktoken'

import OpenAI from 'openai'
import config from '../config'
import { HistoryRecord } from '../types'
import { WorkspaceClient } from '../workspace/workspaceClient'
import { getTools } from './tools'

export async function translateHtml (client: OpenAI, html: string, lang: string): Promise<string | undefined> {
const response = await client.chat.completions.create({
Expand Down Expand Up @@ -66,6 +67,58 @@ export async function createChatCompletion (
return undefined
}

export async function createChatCompletionWithTools (
workspaceClient: WorkspaceClient,
client: OpenAI,
message: OpenAI.ChatCompletionMessageParam,
user?: string,
history: OpenAI.ChatCompletionMessageParam[] = [],
skipCache = true
): Promise<
| {
completion: string | undefined
usage: number
}
| undefined
> {
const opt: OpenAI.RequestOptions = {}
if (skipCache) {
opt.headers = { 'cf-skip-cache': 'true' }
}
try {
const res = client.beta.chat.completions
.runTools(
{
messages: [
{
role: 'system',
content: 'Use tools if possible, don`t use previous information after success using tool for user request'
},
...history,
message
],
model: config.OpenAIModel,
user,
tools: getTools(workspaceClient, user)
},
opt
)
.on('message', (message) => {
console.log(message)
})
const str = await res.finalContent()
const usage = (await res.totalUsage()).completion_tokens
return {
completion: str ?? undefined,
usage
}
} catch (e) {
console.error(e)
}

return undefined
}

export async function requestSummary (
aiClient: OpenAI,
encoding: Tiktoken,
Expand Down
Loading

0 comments on commit d63db0f

Please sign in to comment.