@@ -24,6 +24,7 @@ import (
24
24
"errors"
25
25
"fmt"
26
26
"net/http"
27
+ "sort"
27
28
"strings"
28
29
"sync"
29
30
@@ -105,6 +106,48 @@ func (p *DefaultProvider) GetClusterCNI(_ context.Context) (string, error) {
105
106
return p .clusterCNI , nil
106
107
}
107
108
109
+ // Get the ID of the default nodepool. If there is no default nodepool, select the nodepool with the most HealthyNodes.
110
+ func (p * DefaultProvider ) getDefaultNodePoolId (ctx context.Context ) (* string , error ) {
111
+ resp , err := p .ackClient .DescribeClusterNodePools (tea .String (p .clusterID ), & ackclient.DescribeClusterNodePoolsRequest {})
112
+ if err != nil {
113
+ log .FromContext (ctx ).Error (err , "Failed to describe cluster nodepools" )
114
+ return nil , err
115
+ }
116
+ if resp == nil || resp .Body == nil || resp .Body .Nodepools == nil {
117
+ return nil , fmt .Errorf ("empty describe cluster nodepools response" )
118
+ }
119
+ if len (resp .Body .Nodepools ) == 0 {
120
+ return nil , fmt .Errorf ("no nodepool found" )
121
+ }
122
+
123
+ nodepools := resp .Body .Nodepools
124
+ sort .Slice (nodepools , func (i , j int ) bool {
125
+ if nodepools [i ].NodepoolInfo == nil || nodepools [j ].NodepoolInfo == nil {
126
+ return false
127
+ }
128
+
129
+ if nodepools [i ].NodepoolInfo .IsDefault != nil && nodepools [j ].NodepoolInfo .IsDefault != nil {
130
+ if * nodepools [i ].NodepoolInfo .IsDefault && ! * nodepools [j ].NodepoolInfo .IsDefault {
131
+ return true
132
+ }
133
+ if ! * nodepools [i ].NodepoolInfo .IsDefault && * nodepools [j ].NodepoolInfo .IsDefault {
134
+ return false
135
+ }
136
+ }
137
+
138
+ if nodepools [i ].Status == nil || nodepools [j ].Status == nil || nodepools [i ].Status .HealthyNodes == nil || nodepools [j ].Status .HealthyNodes == nil {
139
+ return false
140
+ }
141
+ return * nodepools [i ].Status .HealthyNodes > * nodepools [j ].Status .HealthyNodes
142
+ })
143
+
144
+ targetNodepool := nodepools [0 ]
145
+ if targetNodepool .NodepoolInfo == nil {
146
+ return nil , fmt .Errorf ("target describe cluster nodepool is empty" )
147
+ }
148
+ return targetNodepool .NodepoolInfo .NodepoolId , nil
149
+ }
150
+
108
151
func (p * DefaultProvider ) GetNodeRegisterScript (ctx context.Context ,
109
152
capacityType string ,
110
153
nodeClaim * karpv1.NodeClaim ,
@@ -114,8 +157,19 @@ func (p *DefaultProvider) GetNodeRegisterScript(ctx context.Context,
114
157
return p .resolveUserData (cachedScript .(string ), labels , nodeClaim , kubeletCfg ), nil
115
158
}
116
159
160
+ nodepoolId , err := p .getDefaultNodePoolId (ctx )
161
+ if err != nil {
162
+ // Don't return here, we can process when there is no default cluster id.
163
+ // We need to try to obtain a usable nodepool ID in order to get the cluster attach scripts.
164
+ // One known scenario is on an ACK cluster with version 1.24, where the user deleted the default nodepool and
165
+ // created a nodepool with a containerd runtime. The DescribeClusterAttachScriptsRequest api will use the
166
+ // CRI configuration of the deleted default nodepool, which might be using the Docker runtime.
167
+ // This could result in nodes failing to register to the new cluster.
168
+ log .FromContext (ctx ).Error (err , "Failed to get default nodepool id" )
169
+ }
117
170
reqPara := & ackclient.DescribeClusterAttachScriptsRequest {
118
171
KeepInstanceName : tea .Bool (true ),
172
+ NodepoolId : nodepoolId ,
119
173
}
120
174
resp , err := p .ackClient .DescribeClusterAttachScripts (tea .String (p .clusterID ), reqPara )
121
175
if err != nil {
0 commit comments