Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions api/orchestration/v1alpha1/roleset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ type RoleSpec struct {
// +optional
Replicas *int32 `json:"replicas,omitempty"`

// UpgradeOrder specifies the order in which this role should be upgraded.
// Lower values are upgraded first. If not specified, defaults to 0.
// +optional
// +kubebuilder:validation:Minimum=0
Copy link
Collaborator

@googs1025 googs1025 Aug 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use this so that we can remove a lot of == nil judgments? 🤔

// +kubebuilder:default:=0

Because the comment says default is 0

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah makes sense. Ill fix that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in 163f384 but I rather keep the == nil checks ( seemed more "responsible" to me ) if you think it's better to have them off I can adjust.

// +kubebuilder:validation:Type=integer
// +kubebuilder:default:=0
UpgradeOrder *int32 `json:"upgradeOrder,omitempty"`

// PodGroupSize is the number of pods to form a minimum role instance.
// +optional
PodGroupSize *int32 `json:"podGroupSize,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions api/orchestration/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -3687,6 +3687,11 @@ spec:
- type: string
x-kubernetes-int-or-string: true
type: object
upgradeOrder:
default: 0
format: int32
minimum: 0
type: integer
type: object
type: array
schedulingStrategy:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3754,6 +3754,11 @@ spec:
- type: string
x-kubernetes-int-or-string: true
type: object
upgradeOrder:
default: 0
format: int32
minimum: 0
type: integer
type: object
type: array
schedulingStrategy:
Expand Down
25 changes: 22 additions & 3 deletions pkg/controller/roleset/rolling.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ package roleset

import (
"context"
"fmt"
"math"
"strings"

orchestrationv1alpha1 "github.com/vllm-project/aibrix/api/orchestration/v1alpha1"

Expand Down Expand Up @@ -50,9 +52,26 @@ func (m *RollingManagerSequential) Next(ctx context.Context, roleSet *orchestrat
klog.Infof("[RollingManagerSequential.Next] waiting for roleset %s/%s to be scaled", roleSet.Namespace, roleSet.Name)
return nil
}
// 2. do the rollout process for each role
// TODO: in future, consider the rollout sequence based on the role's priority
for _, role := range roleSet.Spec.Roles {

// 2. Sort roles by upgrade order
klog.Infof("[RollingManagerSequential.Next] sorting roleset roles by UpgradeOrder")
sortedRoles := sortRolesByUpgradeOrder(roleSet.Spec.Roles)
var sequenceLines []string
for i, role := range sortedRoles {
order := int32(0)
if role.UpgradeOrder != nil {
order = *role.UpgradeOrder
}
sequenceLines = append(sequenceLines, fmt.Sprintf("[%d] %s (Order=%d)", i+1, role.Name, order))
}

klog.Infof("[RollingManagerSequential.Next] Upgrade sequence for %s/%s:\n%s",
roleSet.Namespace,
roleSet.Name,
strings.Join(sequenceLines, "\n"))

// 3. do the rollout process for each role by order
for _, role := range sortedRoles {
klog.Infof("[RollingManagerSequential.Next] start to rollout roleset %s/%s role %s", roleSet.Namespace, roleSet.Name, role.Name)
err := GetRoleSyncer(m.cli, &role).Rollout(ctx, roleSet, &role)
if err != nil {
Expand Down
17 changes: 17 additions & 0 deletions pkg/controller/roleset/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,20 @@ func deletePodsInBatch(ctx context.Context, cli client.Client, podsToDelete []*v
return cli.Delete(ctx, podsToDelete[index])
})
}

func sortRolesByUpgradeOrder(roles []orchestrationv1alpha1.RoleSpec) []orchestrationv1alpha1.RoleSpec {
sortedRoles := make([]orchestrationv1alpha1.RoleSpec, len(roles))
copy(sortedRoles, roles)
sort.SliceStable(sortedRoles, func(i, j int) bool {
orderI := int32(0)
if sortedRoles[i].UpgradeOrder != nil {
orderI = *sortedRoles[i].UpgradeOrder
}
orderJ := int32(0)
if sortedRoles[j].UpgradeOrder != nil {
orderJ = *sortedRoles[j].UpgradeOrder
}
return orderI < orderJ
})
return sortedRoles
}
105 changes: 105 additions & 0 deletions pkg/controller/roleset/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package roleset

import (
"reflect"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -292,3 +293,107 @@ func makeNotReadyPod(name string) *corev1.Pod {
pod.Status.Conditions[0].Status = corev1.ConditionFalse
return pod
}

func TestSortRolesByUpgradeOrder(t *testing.T) {
int32Ptr := func(i int32) *int32 { return &i }

tests := []struct {
name string
roles []orchestrationv1alpha1.RoleSpec
expected []orchestrationv1alpha1.RoleSpec
}{
{
name: "empty roles",
roles: []orchestrationv1alpha1.RoleSpec{},
expected: []orchestrationv1alpha1.RoleSpec{},
},
{
name: "already sorted roles",
roles: []orchestrationv1alpha1.RoleSpec{
{Name: "role1", UpgradeOrder: int32Ptr(1)},
{Name: "role2", UpgradeOrder: int32Ptr(2)},
{Name: "role3", UpgradeOrder: int32Ptr(3)},
},
expected: []orchestrationv1alpha1.RoleSpec{
{Name: "role1", UpgradeOrder: int32Ptr(1)},
{Name: "role2", UpgradeOrder: int32Ptr(2)},
{Name: "role3", UpgradeOrder: int32Ptr(3)},
},
},
{
name: "unsorted roles",
roles: []orchestrationv1alpha1.RoleSpec{
{Name: "role3", UpgradeOrder: int32Ptr(3)},
{Name: "role1", UpgradeOrder: int32Ptr(1)},
{Name: "role2", UpgradeOrder: int32Ptr(2)},
},
expected: []orchestrationv1alpha1.RoleSpec{
{Name: "role1", UpgradeOrder: int32Ptr(1)},
{Name: "role2", UpgradeOrder: int32Ptr(2)},
{Name: "role3", UpgradeOrder: int32Ptr(3)},
},
},
{
name: "roles with nil upgrade order",
roles: []orchestrationv1alpha1.RoleSpec{
{Name: "role3", UpgradeOrder: int32Ptr(2)},
{Name: "role1", UpgradeOrder: nil},
{Name: "role2", UpgradeOrder: int32Ptr(1)},
},
expected: []orchestrationv1alpha1.RoleSpec{
{Name: "role1", UpgradeOrder: nil},
{Name: "role2", UpgradeOrder: int32Ptr(1)},
{Name: "role3", UpgradeOrder: int32Ptr(2)},
},
},
{
name: "roles with same upgrade order",
roles: []orchestrationv1alpha1.RoleSpec{
{Name: "role1", UpgradeOrder: int32Ptr(1)},
{Name: "role2", UpgradeOrder: int32Ptr(1)},
{Name: "role3", UpgradeOrder: int32Ptr(1)},
},
expected: []orchestrationv1alpha1.RoleSpec{
{Name: "role1", UpgradeOrder: int32Ptr(1)},
{Name: "role2", UpgradeOrder: int32Ptr(1)},
{Name: "role3", UpgradeOrder: int32Ptr(1)},
},
},
{
name: "mix of nil and non-nil upgrade orders",
roles: []orchestrationv1alpha1.RoleSpec{
{Name: "role4", UpgradeOrder: int32Ptr(2)},
{Name: "role1", UpgradeOrder: nil},
{Name: "role2", UpgradeOrder: nil},
{Name: "role3", UpgradeOrder: int32Ptr(1)},
},
expected: []orchestrationv1alpha1.RoleSpec{
{Name: "role1", UpgradeOrder: nil},
{Name: "role2", UpgradeOrder: nil},
{Name: "role3", UpgradeOrder: int32Ptr(1)},
{Name: "role4", UpgradeOrder: int32Ptr(2)},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a copy of input roles to verify the original slice is not modified
originalRoles := make([]orchestrationv1alpha1.RoleSpec, len(tt.roles))
copy(originalRoles, tt.roles)

result := sortRolesByUpgradeOrder(tt.roles)
t.Logf("result len %d", len(result))

// Check if the result matches expected
if !reflect.DeepEqual(result, tt.expected) {
t.Errorf("sortRolesByUpgradeOrder() = %v, want %v", result, tt.expected)
}

// Verify the original slice was not modified
if !reflect.DeepEqual(tt.roles, originalRoles) {
t.Errorf("Original roles were modified: got %v, want %v", tt.roles, originalRoles)
}
})
}
}
28 changes: 16 additions & 12 deletions test/integration/webhook/stormservice_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,10 @@ var _ = ginkgo.Describe("stormservice default webhook", func() {
UpdateStrategy: orchestrationapi.InterleaveRoleSetStrategyType,
Roles: []orchestrationapi.RoleSpec{
{
Name: "worker",
Replicas: ptr.To(int32(1)),
Stateful: false,
Name: "worker",
Replicas: ptr.To(int32(1)),
UpgradeOrder: ptr.To(int32(1)),
Stateful: false,
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
Expand Down Expand Up @@ -202,9 +203,10 @@ var _ = ginkgo.Describe("stormservice default webhook", func() {
},
},
{
Name: "master",
Replicas: ptr.To(int32(1)),
Stateful: true,
Name: "master",
Replicas: ptr.To(int32(1)),
UpgradeOrder: ptr.To(int32(1)),
Stateful: true,
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
Expand Down Expand Up @@ -326,9 +328,10 @@ func makeStormServiceWithNoSidecarInjection(name, namespace string,
UpdateStrategy: orchestrationapi.InterleaveRoleSetStrategyType,
Roles: []orchestrationapi.RoleSpec{
{
Name: "worker",
Replicas: ptr.To(int32(1)),
Stateful: false,
Name: "worker",
Replicas: ptr.To(int32(1)),
UpgradeOrder: ptr.To(int32(1)),
Stateful: false,
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
Expand Down Expand Up @@ -360,9 +363,10 @@ func makeStormServiceWithNoSidecarInjection(name, namespace string,
},
},
{
Name: "master",
Replicas: ptr.To(int32(1)),
Stateful: true,
Name: "master",
Replicas: ptr.To(int32(1)),
UpgradeOrder: ptr.To(int32(1)),
Stateful: true,
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
Expand Down