Skip to content

Commit 7fdb848

Browse files
committed
select nodepool id when describe cluster attach scripts
Signed-off-by: Vacant2333 <[email protected]>
1 parent 6d06c50 commit 7fdb848

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

pkg/providers/ack/ack.go

+54
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"errors"
2525
"fmt"
2626
"net/http"
27+
"sort"
2728
"strings"
2829
"sync"
2930

@@ -105,6 +106,48 @@ func (p *DefaultProvider) GetClusterCNI(_ context.Context) (string, error) {
105106
return p.clusterCNI, nil
106107
}
107108

109+
// Get the ID of the default nodepool. If there is no default nodepool, select the nodepool with the most HealthyNodes.
110+
func (p *DefaultProvider) getDefaultNodePoolId(ctx context.Context) (*string, error) {
111+
resp, err := p.ackClient.DescribeClusterNodePools(tea.String(p.clusterID), &ackclient.DescribeClusterNodePoolsRequest{})
112+
if err != nil {
113+
log.FromContext(ctx).Error(err, "Failed to describe cluster nodepools")
114+
return nil, err
115+
}
116+
if resp == nil || resp.Body == nil || resp.Body.Nodepools == nil {
117+
return nil, fmt.Errorf("empty describe cluster nodepools response")
118+
}
119+
if len(resp.Body.Nodepools) == 0 {
120+
return nil, fmt.Errorf("no nodepool found")
121+
}
122+
123+
nodepools := resp.Body.Nodepools
124+
sort.Slice(nodepools, func(i, j int) bool {
125+
if nodepools[i].NodepoolInfo == nil || nodepools[j].NodepoolInfo == nil {
126+
return false
127+
}
128+
129+
if nodepools[i].NodepoolInfo.IsDefault != nil && nodepools[j].NodepoolInfo.IsDefault != nil {
130+
if *nodepools[i].NodepoolInfo.IsDefault && !*nodepools[j].NodepoolInfo.IsDefault {
131+
return true
132+
}
133+
if !*nodepools[i].NodepoolInfo.IsDefault && *nodepools[j].NodepoolInfo.IsDefault {
134+
return false
135+
}
136+
}
137+
138+
if nodepools[i].Status == nil || nodepools[j].Status == nil || nodepools[i].Status.HealthyNodes == nil || nodepools[j].Status.HealthyNodes == nil {
139+
return false
140+
}
141+
return *nodepools[i].Status.HealthyNodes > *nodepools[j].Status.HealthyNodes
142+
})
143+
144+
targetNodepool := nodepools[0]
145+
if targetNodepool.NodepoolInfo == nil {
146+
return nil, fmt.Errorf("target describe cluster nodepool is empty")
147+
}
148+
return targetNodepool.NodepoolInfo.NodepoolId, nil
149+
}
150+
108151
func (p *DefaultProvider) GetNodeRegisterScript(ctx context.Context,
109152
capacityType string,
110153
nodeClaim *karpv1.NodeClaim,
@@ -114,8 +157,19 @@ func (p *DefaultProvider) GetNodeRegisterScript(ctx context.Context,
114157
return p.resolveUserData(cachedScript.(string), labels, nodeClaim, kubeletCfg), nil
115158
}
116159

160+
nodepoolId, err := p.getDefaultNodePoolId(ctx)
161+
if err != nil {
162+
// Don't return here, we can process when there is no default cluster id.
163+
// We need to try to obtain a usable nodepool ID in order to get the cluster attach scripts.
164+
// One known scenario is on an ACK cluster with version 1.24, where the user deleted the default nodepool and
165+
// created a nodepool with a containerd runtime. The DescribeClusterAttachScriptsRequest api will use the
166+
// CRI configuration of the deleted default nodepool, which might be using the Docker runtime.
167+
// This could result in nodes failing to register to the new cluster.
168+
log.FromContext(ctx).Error(err, "Failed to get default nodepool id")
169+
}
117170
reqPara := &ackclient.DescribeClusterAttachScriptsRequest{
118171
KeepInstanceName: tea.Bool(true),
172+
NodepoolId: nodepoolId,
119173
}
120174
resp, err := p.ackClient.DescribeClusterAttachScripts(tea.String(p.clusterID), reqPara)
121175
if err != nil {

0 commit comments

Comments
 (0)