Skip to content

Commit a709e29

Browse files
authored
[Feat] Support scaling LoRA adapters to multiple replicas (#1132)
* Remove unused labels import * update sample files to support scaling * Update the lora scaling to be based on the replica count set as part of the ModelAdapter * fix linting --------- Signed-off-by: dittops <[email protected]>
1 parent 227d3ba commit a709e29

File tree

8 files changed

+254
-254
lines changed

8 files changed

+254
-254
lines changed

pkg/controller/modeladapter/modeladapter_controller.go

Lines changed: 219 additions & 247 deletions
Large diffs are not rendered by default.

pkg/controller/modeladapter/resources.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,16 @@ import (
2525
"k8s.io/utils/ptr"
2626
)
2727

28-
func buildModelAdapterEndpointSlice(instance *modelv1alpha1.ModelAdapter, pod *corev1.Pod) *discoveryv1.EndpointSlice {
28+
func buildModelAdapterEndpointSlice(instance *modelv1alpha1.ModelAdapter, pods []corev1.Pod) *discoveryv1.EndpointSlice {
2929
serviceLabels := map[string]string{
3030
"kubernetes.io/service-name": instance.Name,
3131
}
3232

33-
addresses := []discoveryv1.Endpoint{
34-
{
33+
addresses := make([]discoveryv1.Endpoint, 0, len(pods))
34+
for _, pod := range pods {
35+
addresses = append(addresses, discoveryv1.Endpoint{
3536
Addresses: []string{pod.Status.PodIP},
36-
},
37+
})
3738
}
3839

3940
ports := []discoveryv1.EndpointPort{

pkg/controller/modeladapter/resources_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,16 @@ func TestBuildModelAdapterEndpointSlice(t *testing.T) {
3737
},
3838
}
3939

40-
// Mock input for Pod
41-
pod := &corev1.Pod{
40+
// Mock input for Pods
41+
pod := corev1.Pod{
4242
Status: corev1.PodStatus{
4343
PodIP: "192.168.1.1",
4444
},
4545
}
46+
pods := []corev1.Pod{pod}
4647

4748
// Call the function to test
48-
endpointSlice := buildModelAdapterEndpointSlice(instance, pod)
49+
endpointSlice := buildModelAdapterEndpointSlice(instance, pods)
4950

5051
// Check EndpointSlice metadata
5152
assert.Equal(t, "test-instance", endpointSlice.Name)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
apiVersion: model.aibrix.ai/v1alpha1
2+
kind: ModelAdapter
3+
metadata:
4+
name: sample-lora-multi-replica
5+
namespace: default
6+
spec:
7+
# Specify the number of replicas for the adapter
8+
# The adapter will be loaded on this many pods matching the selector
9+
replicas: 3
10+
# Pod selector to identify which pods can host this adapter
11+
podSelector:
12+
matchLabels:
13+
model.aibrix.ai/name: qwen-coder-1-5b-instruct
14+
adapter.model.aibrix.ai/enabled: "true"
15+
# URL for the LoRA adapter artifact
16+
artifactURL: "huggingface://SomethingNew/lora-adapter-demo"
17+
# Optional: Additional configuration
18+
additionalConfig:
19+
rank: "16"
20+
alpha: "32"

samples/adapter/adapter.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ metadata:
77
model.aibrix.ai/name: "qwen-code-lora"
88
model.aibrix.ai/port: "8000"
99
spec:
10+
# Optional: Number of replicas for the adapter (default: 1)
11+
# Uncomment to load adapter on multiple pods
12+
# replicas: 3
1013
baseModel: qwen-coder-1-5b-instruct
1114
podSelector:
1215
matchLabels:

samples/adapter/base-api-key.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ spec:
1616
metadata:
1717
labels:
1818
model.aibrix.ai/name: qwen-coder-1-5b-instruct
19+
adapter.model.aibrix.ai/enabled: "true"
1920
spec:
2021
containers:
2122
- command:

samples/adapter/base-without-runtime.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ spec:
1616
metadata:
1717
labels:
1818
model.aibrix.ai/name: qwen-coder-1-5b-instruct
19+
adapter.model.aibrix.ai/enabled: "true"
1920
spec:
2021
containers:
2122
- command:

samples/adapter/base.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ spec:
1616
metadata:
1717
labels:
1818
model.aibrix.ai/name: qwen-coder-1-5b-instruct
19+
adapter.model.aibrix.ai/enabled: "true"
1920
spec:
2021
containers:
2122
- command:

0 commit comments

Comments
 (0)