Skip to content

cherry-pick: unused env and select nodepool ID #209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions charts/karpenter/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ spec:
value: "{{ .Values.controller.healthProbe.port }}"
- name: CLUSTER_ID
value: {{ .Values.controller.settings.clusterID }}
- name: CLUSTER_CNI
value: {{ .Values.controller.settings.clusterCNI }}
- name: TELEMETRY_SHARE
value: {{ .Values.controller.settings.telemetryShare }}
- name: KUBERNETES_MIN_VERSION
Expand Down
41 changes: 41 additions & 0 deletions charts/karpenter/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,44 @@ rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "karpenter.fullname" . }}-lease
namespace: kube-node-lease
labels:
{{- include "karpenter.labels" . | nindent 4 }}
{{- with .Values.additionalAnnotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ include "karpenter.fullname" . }}-lease
subjects:
- kind: ServiceAccount
name: {{ template "karpenter.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "karpenter.fullname" . }}-lease
namespace: kube-node-lease
labels:
{{- include "karpenter.labels" . | nindent 4 }}
{{- with .Values.additionalAnnotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
rules:
# Read
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["get", "list", "watch"]
# Write
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["delete"]
2 changes: 0 additions & 2 deletions charts/karpenter/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ controller:
affinity: {}
tolerations: []
settings:
# -- The network cni used by the cluster.
clusterCNI: terway-eniip
# -- The external kubernetes cluster id for new nodes to connect with.
clusterID: ""
# -- The VM memory overhead as a percent that will be subtracted from the total memory for all instance types. The value of `0.075` equals to 7.5%.
Expand Down
57 changes: 57 additions & 0 deletions pkg/providers/ack/ack.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"fmt"
"net/http"
"regexp"
"sort"
"strings"
"sync"

Expand Down Expand Up @@ -103,15 +104,71 @@ func (p *DefaultProvider) GetClusterCNI(_ context.Context) (string, error) {
return p.clusterCNI, nil
}

// Get the ID of the target nodepool id when DescribeClusterAttachScriptsRequest.
// If there is no default nodepool, select the nodepool with the most HealthyNodes.
//
//nolint:gocyclo
func (p *DefaultProvider) getTargetNodePoolID(ctx context.Context) (*string, error) {
resp, err := p.ackClient.DescribeClusterNodePools(tea.String(p.clusterID), &ackclient.DescribeClusterNodePoolsRequest{})
if err != nil {
log.FromContext(ctx).Error(err, "Failed to describe cluster nodepools")
return nil, err
}
if resp == nil || resp.Body == nil || resp.Body.Nodepools == nil {
return nil, fmt.Errorf("empty describe cluster nodepools response")
}
if len(resp.Body.Nodepools) == 0 {
return nil, fmt.Errorf("no nodepool found")
}

nodepools := resp.Body.Nodepools
sort.Slice(nodepools, func(i, j int) bool {
if nodepools[i].NodepoolInfo == nil || nodepools[j].NodepoolInfo == nil {
return false
}

if nodepools[i].NodepoolInfo.IsDefault != nil && nodepools[j].NodepoolInfo.IsDefault != nil {
if *nodepools[i].NodepoolInfo.IsDefault && !*nodepools[j].NodepoolInfo.IsDefault {
return true
}
if !*nodepools[i].NodepoolInfo.IsDefault && *nodepools[j].NodepoolInfo.IsDefault {
return false
}
}

if nodepools[i].Status == nil || nodepools[j].Status == nil || nodepools[i].Status.HealthyNodes == nil || nodepools[j].Status.HealthyNodes == nil {
return false
}
return *nodepools[i].Status.HealthyNodes > *nodepools[j].Status.HealthyNodes
})

targetNodepool := nodepools[0]
if targetNodepool.NodepoolInfo == nil {
return nil, fmt.Errorf("target describe cluster nodepool is empty")
}
return targetNodepool.NodepoolInfo.NodepoolId, nil
}

func (p *DefaultProvider) GetNodeRegisterScript(ctx context.Context,
labels map[string]string,
kubeletCfg *v1alpha1.KubeletConfiguration) (string, error) {
if cachedScript, ok := p.cache.Get(p.clusterID); ok {
return p.resolveUserData(cachedScript.(string), labels, kubeletCfg), nil
}

nodepoolID, err := p.getTargetNodePoolID(ctx)
if err != nil {
// Don't return here, we can process when there is no default cluster id.
// We need to try to obtain a usable nodepool ID in order to get the cluster attach scripts.
// One known scenario is on an ACK cluster with version 1.24, where the user deleted the default nodepool and
// created a nodepool with a containerd runtime. The DescribeClusterAttachScriptsRequest api will use the
// CRI configuration of the deleted default nodepool, which might be using the Docker runtime.
// This could result in nodes failing to register to the new cluster.
log.FromContext(ctx).Error(err, "Failed to get default nodepool id")
}
reqPara := &ackclient.DescribeClusterAttachScriptsRequest{
KeepInstanceName: tea.Bool(true),
NodepoolId: nodepoolID,
}
resp, err := p.ackClient.DescribeClusterAttachScripts(tea.String(p.clusterID), reqPara)
if err != nil {
Expand Down