From afda7c057a7192abc22bb34433013b7f4924bee8 Mon Sep 17 00:00:00 2001 From: Brent Salisbury Date: Tue, 3 Jun 2025 02:36:44 -0400 Subject: [PATCH] Add a kvcache example deployment - Enables redis and ENABLE_KVCACHE_AWARE_SCORER Signed-off-by: Brent Salisbury --- quickstart/examples/kvcache/kvcache.yaml | 30 ++++++++++++++++ .../examples/kvcache/slim/kvcache-slim.yaml | 34 +++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 quickstart/examples/kvcache/kvcache.yaml create mode 100644 quickstart/examples/kvcache/slim/kvcache-slim.yaml diff --git a/quickstart/examples/kvcache/kvcache.yaml b/quickstart/examples/kvcache/kvcache.yaml new file mode 100644 index 0000000..8ef836f --- /dev/null +++ b/quickstart/examples/kvcache/kvcache.yaml @@ -0,0 +1,30 @@ +sampleApplication: + baseConfigMapRefName: basic-gpu-with-nixl-and-redis-lookup-preset + model: + modelArtifactURI: hf://meta-llama/Llama-3.2-3B-Instruct + modelName: "meta-llama/Llama-3.2-3B-Instruct" +redis: + enabled: true +modelservice: + epp: + defaultEnvVarsOverride: + - name: ENABLE_KVCACHE_AWARE_SCORER + value: "true" + - name: ENABLE_PREFIX_AWARE_SCORER + value: "false" + - name: ENABLE_LOAD_AWARE_SCORER + value: "true" + - name: ENABLE_SESSION_AWARE_SCORER + value: "false" + - name: PD_ENABLED + value: "false" + - name: PD_PROMPT_LEN_THRESHOLD + value: "10" + - name: PREFILL_ENABLE_KVCACHE_AWARE_SCORER + value: "false" + - name: PREFILL_ENABLE_LOAD_AWARE_SCORER + value: "false" + - name: PREFILL_ENABLE_PREFIX_AWARE_SCORER + value: "false" + - name: PREFILL_ENABLE_SESSION_AWARE_SCORER + value: "false" diff --git a/quickstart/examples/kvcache/slim/kvcache-slim.yaml b/quickstart/examples/kvcache/slim/kvcache-slim.yaml new file mode 100644 index 0000000..a6ddd54 --- /dev/null +++ b/quickstart/examples/kvcache/slim/kvcache-slim.yaml @@ -0,0 +1,34 @@ +sampleApplication: + baseConfigMapRefName: basic-gpu-with-nixl-and-redis-lookup-preset + model: + modelArtifactURI: hf://Qwen/Qwen3-0.6B + modelName: "Qwen/Qwen3-0.6B" + prefill: + replicas: 0 + decode: + replicas: 1 +redis: + enabled: true +modelservice: + epp: + defaultEnvVarsOverride: + - name: ENABLE_KVCACHE_AWARE_SCORER + value: "true" + - name: ENABLE_PREFIX_AWARE_SCORER + value: "false" + - name: ENABLE_LOAD_AWARE_SCORER + value: "true" + - name: ENABLE_SESSION_AWARE_SCORER + value: "false" + - name: PD_ENABLED + value: "false" + - name: PD_PROMPT_LEN_THRESHOLD + value: "10" + - name: PREFILL_ENABLE_KVCACHE_AWARE_SCORER + value: "false" + - name: PREFILL_ENABLE_LOAD_AWARE_SCORER + value: "false" + - name: PREFILL_ENABLE_PREFIX_AWARE_SCORER + value: "false" + - name: PREFILL_ENABLE_SESSION_AWARE_SCORER + value: "false"