forked from containers/ramalama
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathramalama.conf
82 lines (66 loc) · 2.57 KB
/
ramalama.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# The RamaLama configuration file specifies all of the available configuration
# command-line options/flags for container engine tools like Podman & Buildah,
# but in a TOML format that can be easily modified and versioned.
# Please refer to ramalama.conf(5) for details of all configuration options.
# Not all container engines implement all of the options.
# All of the options have hard coded defaults and these options override
# the built in defaults. Users can override these options via the command
# line. Container engines read ramalama.conf files in up to three
# locations in the following order:
# 1. /usr/share/ramalama/ramalama.conf
# 2. /etc/ramalama/ramalama.conf
# 3. $XDG_CONFIG_HOME/ramalama/ramalama.conf or
# $HOME/.config/ramalama/ramalama.conf if $XDG_CONFIG_HOME is not set
# Items specified in the latter ramalama.conf, if they exist, override the
# previous ramalama.conf settings, or the default settings.
[ramalama]
# OCI model car image
# Image to use when building and pushing --type=car models
#
#carimage = "registry.access.redhat.com/ubi9-micro:latest"
# Run RamaLama in the default container.
#
#container = true
#size of the prompt context (0 = loaded from model)
#
#ctx_size=2048
# Run RamaLama using the specified container engine.
#
# Valid options (Podman, Docker)
#engine = "podman"
# OCI container image to run with the specified AI model
#
#image = "quay.io/ramalama/ramalama:latest"
# IP address for llama.cpp to listen on.
#
#host = "0.0.0.0"
# Pass `--group-add keep-groups` to podman, when using podman.
# In some cases this is needed to access the gpu from a rootless container
#
#keep_groups = false
# Default number of layers offloaded to the gpu
#
#ngl = 999
# Specify default port for services to listen on
#
#port = "8080"
# Specify the AI runtime to use; valid options are 'llama.cpp' and 'vllm' (default: llama.cpp)
# Options: llama.cpp, vllm
#
#runtime = "llama.cpp"
# Store AI Models in the specified directory
#
#store = "$HOME/.local/share/ramalama"
# Temperature of the response from the AI Model
# llama.cpp explains this as:
#
# The lower the number is, the more deterministic the response is.
#
# The higher the number is the more creative the response, but more likely to hallucinate when set too high.
#
# Usage: Lower numbers are good for virtual assistants where we need deterministic responses. Higher numbers are good for roleplay or creative tasks like editing stories
#temp=0.8
# Specify the default transport to be used for pulling and pushing of AI Models.
# Options: oci, ollama, huggingface.
#
#transport = "ollama"