-
Notifications
You must be signed in to change notification settings - Fork 333
/
Copy pathapp.yaml
61 lines (50 loc) · 1.66 KB
/
app.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
$sources:
- !pw.io.fs.read
path: files-for-indexing
format: binary
with_metadata: true
# - !pw.xpacks.connectors.sharepoint.read
# url: $SHAREPOINT_URL
# tenant: $SHAREPOINT_TENANT
# client_id: $SHAREPOINT_CLIENT_ID
# cert_path: sharepointcert.pem
# thumbprint: $SHAREPOINT_THUMBPRINT
# root_path: $SHAREPOINT_ROOT
# with_metadata: true
# refresh_interval: 30
# - !pw.io.gdrive.read
# object_id: $DRIVE_ID
# service_user_credentials_file: gdrive_indexer.json
# file_name_pattern:
# - "*.pdf"
# - "*.pptx"
# object_size_limit: null
# with_metadata: true
# refresh_interval: 30
$embedding_model: "mixedbread-ai/mxbai-embed-large-v1"
$embedder: !pw.xpacks.llm.embedders.SentenceTransformerEmbedder
model: $embedding_model
call_kwargs:
show_progress_bar: False
$splitter: !pw.xpacks.llm.splitters.TokenCountSplitter
max_tokens: 400
$parser: !pw.xpacks.llm.parsers.UnstructuredParser
cache_strategy: !pw.udfs.DefaultCache
$retriever_factory: !pw.stdlib.indexing.BruteForceKnnFactory
reserved_space: 1000
embedder: $embedder
metric: !pw.stdlib.indexing.BruteForceKnnMetricKind.COS
dimensions: 1536
document_store: !pw.xpacks.llm.document_store.DocumentStore
docs: $sources
parser: $parser
splitter: $splitter
retriever_factory: $retriever_factory
# Change host and port by uncommenting these lines
# host: "0.0.0.0"
# port: 8000
# Cache configuration
# with_cache: true
# If `terminate_on_error` is true then the program will terminate whenever any error is encountered.
# Defaults to false, uncomment the following line if you want to set it to true
# terminate_on_error: true