Skip to content

Commit a3f82c1

Browse files
committed
tidy up config examples in README
1 parent 5c97299 commit a3f82c1

File tree

1 file changed

+13
-20
lines changed

1 file changed

+13
-20
lines changed

README.md

+13-20
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,14 @@ logRequests: true
7070
# define valid model values and the upstream server start
7171
models:
7272
"llama":
73-
cmd: llama-server --port 8999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf
73+
# multiline for readability
74+
cmd: >
75+
llama-server --port 8999
76+
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
77+
78+
# environment variables to pass to the command
79+
env:
80+
- "CUDA_VISIBLE_DEVICES=0"
7481

7582
# where to reach the server started by cmd, make sure the ports match
7683
proxy: http://127.0.0.1:8999
@@ -91,16 +98,9 @@ models:
9198
# default: 0 = never unload model
9299
ttl: 60
93100

94-
"qwen":
95-
# environment variables to pass to the command
96-
env:
97-
- "CUDA_VISIBLE_DEVICES=0"
98-
99-
# multiline for readability
100-
cmd: >
101-
llama-server --port 8999
102-
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
103-
proxy: http://127.0.0.1:8999
101+
# `useModelName` overrides the model name in the request
102+
# and sends a specific name to the upstream server
103+
useModelName: "qwen:qwq"
104104

105105
# unlisted models do not show up in /v1/models or /upstream lists
106106
# but they can still be requested as normal
@@ -117,23 +117,16 @@ models:
117117
ghcr.io/ggerganov/llama.cpp:server
118118
--model '/models/Qwen2.5-Coder-0.5B-Instruct-Q4_K_M.gguf'
119119
120-
# `useModelName` will send a specific model name to the upstream server
121-
# overriding whatever was set in the request
122-
"qwq":
123-
proxy: http://127.0.0.1:11434
124-
cmd: my-server
125-
useModelName: "qwen:qwq"
126-
127-
# profiles make it easy to managing multi model (and gpu) configurations.
120+
# profiles eliminates swapping by running multiple models at the same time
128121
#
129122
# Tips:
130123
# - each model must be listening on a unique address and port
131124
# - the model name is in this format: "profile_name:model", like "coding:qwen"
132125
# - the profile will load and unload all models in the profile at the same time
133126
profiles:
134127
coding:
135-
- "qwen"
136128
- "llama"
129+
- "qwen-unlisted"
137130
```
138131
139132
### Use Case Examples

0 commit comments

Comments
 (0)