-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchat.post.ts
130 lines (107 loc) · 3.7 KB
/
chat.post.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import { Configuration, OpenAIApi } from 'openai'
import { createClient } from '@supabase/supabase-js'
import GPT3Tokenizer from 'gpt3-tokenizer'
import { OpenAI } from 'openai-streams/node'
import { sendStream } from 'h3'
const openai = new OpenAIApi(
new Configuration({
apiKey: useRuntimeConfig().openaiApiKey
})
)
const supabase = createClient(
useRuntimeConfig().supabaseUrl,
useRuntimeConfig().supabaseKey
)
// @ts-ignore
const tokenizer = new GPT3Tokenizer.default({ type: 'gpt3' })
export default defineEventHandler(async (event) => {
try {
const { query } = await readBody(event)
// OpenAI recommends replacing newlines with spaces for best results
const input = query.replace(/\n/g, ' ')
// Generate a one-time embedding for the query itself
const embeddingResponse = await openai.createEmbedding({
model: 'text-embedding-ada-002',
input
})
const [{ embedding }] = embeddingResponse.data.data
const { error, data: documents } = await supabase.rpc('match_documents', {
query_embedding: embedding,
similarity_threshold: 0.1,
match_count: 30
})
if (error) return { error: error.message }
// Create context
let tokenCount = 0
let contextText = ''
// Concat matched documents
for (let i = 0; i < documents.length; i++) {
const document = documents[i]
const content = document.content
const encoded = tokenizer.encode(content)
const prevTokenCount = tokenCount
tokenCount += encoded.text.length
// Limit context tokens
if (tokenCount > 8192) {
console.log('Previous token count', prevTokenCount)
break
}
contextText += `${content.trim()}\n---\n`
}
console.log(
'Context documents',
documents.length,
'Context documents scores',
documents.map((i: any) => i.similarity),
'token count',
tokenCount
)
const systemContent = `You are a very enthusiastic Replicate representative who loves to help people! Given the following sections from the Replicate documentation, answer the question using only that information, outputted in markdown format. If you are unsure and the answer is not explicitly written in the documentation, say "Sorry, I don't know how to help with that.".`
const userContent = `Context sections:
You can use Replicate to run machine learning models in the cloud from your own code, without having to set up any servers. Our community has published hundreds of open-source models that you can run, or you can run your own models.
Question:
what is replicate?
`
const assistantContent = `Replicate lets you run machine learning models with a cloud API, without having to understand the intricacies of machine learning or manage your own infrastructure. You can run open-source models that other people have published, or package and publish your own models. Those models can be public or private.`
const userMessage = `Context sections:
${contextText}
Question:
${query}`
const messages: any[] = [
{
role: 'system',
content: systemContent
},
{
role: 'user',
content: userContent
},
{
role: 'assistant',
content: assistantContent
},
{
role: 'user',
content: userMessage
}
]
const stream = await OpenAI(
'chat',
{
model: 'gpt-4',
messages,
stream: true,
temperature: 0,
top_p: 1,
frequency_penalty: 0,
presence_penalty: 0,
n: 1
},
{ apiKey: useRuntimeConfig().openaiApiKey }
)
return sendStream(event, stream)
} catch (e: any) {
console.error(e)
return { error: e.message }
}
})