Skip to content

Commit 0a6fcfe

Browse files
authored
Merge pull request #208 from Vacant2333/select-nodepool-id-when-describe-cluster-attach-scriptes
fix: select nodepool id when describe cluster attach scripts
2 parents 6d06c50 + eb3c38e commit 0a6fcfe

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

pkg/providers/ack/ack.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"errors"
2525
"fmt"
2626
"net/http"
27+
"sort"
2728
"strings"
2829
"sync"
2930

@@ -105,6 +106,51 @@ func (p *DefaultProvider) GetClusterCNI(_ context.Context) (string, error) {
105106
return p.clusterCNI, nil
106107
}
107108

109+
// Get the ID of the target nodepool id when DescribeClusterAttachScriptsRequest.
110+
// If there is no default nodepool, select the nodepool with the most HealthyNodes.
111+
//
112+
//nolint:gocyclo
113+
func (p *DefaultProvider) getTargetNodePoolID(ctx context.Context) (*string, error) {
114+
resp, err := p.ackClient.DescribeClusterNodePools(tea.String(p.clusterID), &ackclient.DescribeClusterNodePoolsRequest{})
115+
if err != nil {
116+
log.FromContext(ctx).Error(err, "Failed to describe cluster nodepools")
117+
return nil, err
118+
}
119+
if resp == nil || resp.Body == nil || resp.Body.Nodepools == nil {
120+
return nil, fmt.Errorf("empty describe cluster nodepools response")
121+
}
122+
if len(resp.Body.Nodepools) == 0 {
123+
return nil, fmt.Errorf("no nodepool found")
124+
}
125+
126+
nodepools := resp.Body.Nodepools
127+
sort.Slice(nodepools, func(i, j int) bool {
128+
if nodepools[i].NodepoolInfo == nil || nodepools[j].NodepoolInfo == nil {
129+
return false
130+
}
131+
132+
if nodepools[i].NodepoolInfo.IsDefault != nil && nodepools[j].NodepoolInfo.IsDefault != nil {
133+
if *nodepools[i].NodepoolInfo.IsDefault && !*nodepools[j].NodepoolInfo.IsDefault {
134+
return true
135+
}
136+
if !*nodepools[i].NodepoolInfo.IsDefault && *nodepools[j].NodepoolInfo.IsDefault {
137+
return false
138+
}
139+
}
140+
141+
if nodepools[i].Status == nil || nodepools[j].Status == nil || nodepools[i].Status.HealthyNodes == nil || nodepools[j].Status.HealthyNodes == nil {
142+
return false
143+
}
144+
return *nodepools[i].Status.HealthyNodes > *nodepools[j].Status.HealthyNodes
145+
})
146+
147+
targetNodepool := nodepools[0]
148+
if targetNodepool.NodepoolInfo == nil {
149+
return nil, fmt.Errorf("target describe cluster nodepool is empty")
150+
}
151+
return targetNodepool.NodepoolInfo.NodepoolId, nil
152+
}
153+
108154
func (p *DefaultProvider) GetNodeRegisterScript(ctx context.Context,
109155
capacityType string,
110156
nodeClaim *karpv1.NodeClaim,
@@ -114,8 +160,19 @@ func (p *DefaultProvider) GetNodeRegisterScript(ctx context.Context,
114160
return p.resolveUserData(cachedScript.(string), labels, nodeClaim, kubeletCfg), nil
115161
}
116162

163+
nodepoolID, err := p.getTargetNodePoolID(ctx)
164+
if err != nil {
165+
// Don't return here, we can process when there is no default cluster id.
166+
// We need to try to obtain a usable nodepool ID in order to get the cluster attach scripts.
167+
// One known scenario is on an ACK cluster with version 1.24, where the user deleted the default nodepool and
168+
// created a nodepool with a containerd runtime. The DescribeClusterAttachScriptsRequest api will use the
169+
// CRI configuration of the deleted default nodepool, which might be using the Docker runtime.
170+
// This could result in nodes failing to register to the new cluster.
171+
log.FromContext(ctx).Error(err, "Failed to get default nodepool id")
172+
}
117173
reqPara := &ackclient.DescribeClusterAttachScriptsRequest{
118174
KeepInstanceName: tea.Bool(true),
175+
NodepoolId: nodepoolID,
119176
}
120177
resp, err := p.ackClient.DescribeClusterAttachScripts(tea.String(p.clusterID), reqPara)
121178
if err != nil {

0 commit comments

Comments
 (0)