Skip to content

Commit b3a6424

Browse files
committed
Update Tiltfile with AKS VNet peering and deletion logic
- update aks-as-mgmt scripts with VNet creation and all clusters deletion - update default template - update default aad - update azure-bastion templates - update azure-cni-v1 templates - update edgezone templates - update ephemeral templates - update private templates - update flatcar templates - update nvdia-gpu templates
1 parent 736bfbc commit b3a6424

File tree

18 files changed

+315
-26
lines changed

18 files changed

+315
-26
lines changed

Tiltfile

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ settings = {
2323
"capi_version": "v1.8.5",
2424
"caaph_version": "v0.2.5",
2525
"cert_manager_version": "v1.16.1",
26-
"kubernetes_version": "v1.28.3",
27-
"aks_kubernetes_version": "v1.28.3",
26+
"kubernetes_version": "v1.28.15",
27+
"aks_kubernetes_version": "v1.28.15",
2828
"flatcar_version": "3374.2.1",
2929
"azure_location": "eastus",
3030
"control_plane_machine_count": "1",
@@ -212,10 +212,10 @@ def capz():
212212
yaml = str(kustomizesub("./hack/observability")) # build an observable kind deployment by default
213213

214214
# add extra_args if they are defined
215-
if settings.get("extra_args"):
216-
azure_extra_args = settings.get("extra_args").get("azure")
215+
if settings.get("container_args"):
216+
capz_container_args = settings.get("container_args").get("capz-controller-manager")
217217
yaml_dict = decode_yaml_stream(yaml)
218-
append_arg_for_container_in_deployment(yaml_dict, "capz-controller-manager", "capz-system", "cluster-api-azure-controller", azure_extra_args)
218+
append_arg_for_container_in_deployment(yaml_dict, "capz-controller-manager", "capz-system", "cluster-api-azure-controller", capz_container_args)
219219
yaml = str(encode_yaml_stream(yaml_dict))
220220
yaml = fixup_yaml_empty_arrays(yaml)
221221

@@ -317,9 +317,14 @@ def flavors():
317317
for template in template_list:
318318
deploy_worker_templates(template, substitutions)
319319

320+
delete_all_workload_clusters = kubectl_cmd + " delete clusters --all --wait=false"
321+
322+
if "aks" in settings.get("kustomize_substitutions", {}).get("MGMT_CLUSTER_NAME", ""):
323+
delete_all_workload_clusters += clear_aks_vnet_peerings()
324+
320325
local_resource(
321326
name = "delete-all-workload-clusters",
322-
cmd = kubectl_cmd + " delete clusters --all --wait=false",
327+
cmd = ["sh", "-ec", delete_all_workload_clusters],
323328
auto_init = False,
324329
trigger_mode = TRIGGER_MODE_MANUAL,
325330
labels = ["flavors"],
@@ -382,17 +387,29 @@ def deploy_worker_templates(template, substitutions):
382387

383388
yaml = shlex.quote(yaml)
384389
flavor_name = os.path.basename(flavor)
385-
flavor_cmd = "RANDOM=$(bash -c 'echo $RANDOM'); export CLUSTER_NAME=" + flavor.replace("windows", "win") + "-$RANDOM; make generate-flavors; echo " + yaml + "> ./.tiltbuild/" + flavor + "; cat ./.tiltbuild/" + flavor + " | " + envsubst_cmd + " | " + kubectl_cmd + " apply -f -; echo \"Cluster \'$CLUSTER_NAME\' created, don't forget to delete\""
390+
flavor_cmd = "RANDOM=$(bash -c 'echo $RANDOM')"
391+
flavor_cmd += "; export CLUSTER_NAME=" + flavor.replace("windows", "win") + "-$RANDOM; echo " + yaml + "> ./.tiltbuild/" + flavor + "; cat ./.tiltbuild/" + flavor + " | " + envsubst_cmd + " | " + kubectl_cmd + " apply -f -"
392+
flavor_cmd += "; echo \"Cluster \'$CLUSTER_NAME\' created, don't forget to delete\""
386393

387394
# wait for kubeconfig to be available
388-
flavor_cmd += "; until " + kubectl_cmd + " get secret ${CLUSTER_NAME}-kubeconfig > /dev/null 2>&1; do sleep 5; done; " + kubectl_cmd + " get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath={.data.value} | base64 --decode > ./${CLUSTER_NAME}.kubeconfig; chmod 600 ./${CLUSTER_NAME}.kubeconfig; until " + kubectl_cmd + " --kubeconfig=./${CLUSTER_NAME}.kubeconfig get nodes > /dev/null 2>&1; do sleep 5; done"
395+
flavor_cmd += "; echo \"Waiting for kubeconfig to be available\""
396+
flavor_cmd += "; until " + kubectl_cmd + " get secret ${CLUSTER_NAME}-kubeconfig > /dev/null 2>&1; do sleep 5; done"
397+
flavor_cmd += "; " + kubectl_cmd + " get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath={.data.value} | base64 --decode > ./${CLUSTER_NAME}.kubeconfig"
398+
flavor_cmd += "; chmod 600 ./${CLUSTER_NAME}.kubeconfig"
399+
flavor_cmd += "; echo \"Kubeconfig for $CLUSTER_NAME created and saved in the local\""
400+
flavor_cmd += "; echo \"Waiting for $CLUSTER_NAME API Server to be accessible\""
401+
flavor_cmd += "; until " + kubectl_cmd + " --kubeconfig=./${CLUSTER_NAME}.kubeconfig get nodes > /dev/null 2>&1; do sleep 5; done"
402+
flavor_cmd += "; echo \"API Server of $CLUSTER_NAME is accessible\""
389403

390404
# copy the kubeadm configmap to the calico-system namespace.
391405
# This is a workaround needed for the calico-node-windows daemonset to be able to run in the calico-system namespace.
392406
if "windows" in flavor_name:
393407
flavor_cmd += "; until " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig get configmap kubeadm-config --namespace=kube-system > /dev/null 2>&1; do sleep 5; done"
394408
flavor_cmd += "; " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig create namespace calico-system --dry-run=client -o yaml | " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig apply -f -; " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig apply -f -"
395409

410+
if "aks" in settings.get("kustomize_substitutions", {}).get("MGMT_CLUSTER_NAME", ""):
411+
flavor_cmd += peer_vnets()
412+
396413
flavor_cmd += get_addons(flavor_name)
397414

398415
local_resource(
@@ -454,6 +471,63 @@ def waitforsystem():
454471
local(kubectl_cmd + " wait --for=condition=ready --timeout=300s pod --all -n capi-kubeadm-control-plane-system")
455472
local(kubectl_cmd + " wait --for=condition=ready --timeout=300s pod --all -n capi-system")
456473

474+
def peer_vnets():
475+
# TODO: check for az cli to be installed in local
476+
# wait for AKS VNet to be in the state created
477+
peering_cmd = "; echo \"--------Peering VNETs--------\""
478+
peering_cmd += "; az network vnet wait --resource-group ${AKS_RESOURCE_GROUP} --name ${AKS_MGMT_VNET_NAME} --created --timeout 180"
479+
peering_cmd += "; export MGMT_VNET_ID=$(az network vnet show --resource-group ${AKS_RESOURCE_GROUP} --name ${AKS_MGMT_VNET_NAME} --query id --output tsv)"
480+
peering_cmd += "; echo \" 1/8 ${AKS_MGMT_VNET_NAME} found \""
481+
482+
# wait for workload VNet to be created
483+
peering_cmd += "; az network vnet wait --resource-group ${CLUSTER_NAME} --name ${CLUSTER_NAME}-vnet --created --timeout 180"
484+
peering_cmd += "; export WORKLOAD_VNET_ID=$(az network vnet show --resource-group ${CLUSTER_NAME} --name ${CLUSTER_NAME}-vnet --query id --output tsv)"
485+
peering_cmd += "; echo \" 2/8 ${CLUSTER_NAME}-vnet found \""
486+
487+
# peer mgmt vnet
488+
peering_cmd += "; az network vnet peering create --name mgmt-to-${CLUSTER_NAME} --resource-group ${AKS_RESOURCE_GROUP} --vnet-name ${AKS_MGMT_VNET_NAME} --remote-vnet \"${WORKLOAD_VNET_ID}\" --allow-vnet-access true --allow-forwarded-traffic true --only-show-errors --output none"
489+
peering_cmd += "; az network vnet peering wait --name mgmt-to-${CLUSTER_NAME} --resource-group ${AKS_RESOURCE_GROUP} --vnet-name ${AKS_MGMT_VNET_NAME} --created --timeout 300 --only-show-errors --output none"
490+
peering_cmd += "; echo \" 3/8 mgmt-to-${CLUSTER_NAME} peering created in ${AKS_MGMT_VNET_NAME}\""
491+
492+
# peer workload vnet
493+
peering_cmd += "; az network vnet peering create --name ${CLUSTER_NAME}-to-mgmt --resource-group ${CLUSTER_NAME} --vnet-name ${CLUSTER_NAME}-vnet --remote-vnet \"${MGMT_VNET_ID}\" --allow-vnet-access true --allow-forwarded-traffic true --only-show-errors --output none"
494+
peering_cmd += "; az network vnet peering wait --name ${CLUSTER_NAME}-to-mgmt --resource-group ${CLUSTER_NAME} --vnet-name ${CLUSTER_NAME}-vnet --created --timeout 300 --only-show-errors --output none"
495+
peering_cmd += "; echo \" 4/8 ${CLUSTER_NAME}-to-mgmt peering created in ${CLUSTER_NAME}-vnet\""
496+
497+
# create private DNS zone
498+
peering_cmd += "; az network private-dns zone create --resource-group ${CLUSTER_NAME} --name ${AZURE_LOCATION}.cloudapp.azure.com --only-show-errors --output none"
499+
peering_cmd += "; az network private-dns zone wait --resource-group ${CLUSTER_NAME} --name ${AZURE_LOCATION}.cloudapp.azure.com --created --timeout 300 --only-show-errors --output none"
500+
peering_cmd += "; echo \" 5/8 ${AZURE_LOCATION}.cloudapp.azure.com private DNS zone created in ${CLUSTER_NAME}\""
501+
502+
# link private DNS Zone to workload vnet
503+
peering_cmd += "; az network private-dns link vnet create --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --name ${CLUSTER_NAME}-to-mgmt --virtual-network \"${WORKLOAD_VNET_ID}\" --registration-enabled false --only-show-errors --output none"
504+
peering_cmd += "; az network private-dns link vnet wait --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --name ${CLUSTER_NAME}-to-mgmt --created --timeout 300 --only-show-errors --output none"
505+
peering_cmd += "; echo \" 6/8 workload cluster vnet ${CLUSTER_NAME}-vnet linked with private DNS zone\""
506+
507+
# link private DNS Zone to mgmt vnet
508+
peering_cmd += "; az network private-dns link vnet create --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --name mgmt-to-${CLUSTER_NAME} --virtual-network \"${MGMT_VNET_ID}\" --registration-enabled false --only-show-errors --output none"
509+
peering_cmd += "; az network private-dns link vnet wait --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --name mgmt-to-${CLUSTER_NAME} --created --timeout 300 --only-show-errors --output none"
510+
peering_cmd += "; echo \" 7/8 management cluster vnet ${AKS_MGMT_VNET_NAME} linked with private DNS zone\""
511+
512+
# create private DNS zone record
513+
# TODO: 10.0.0.100 should be customizable
514+
peering_cmd += "; az network private-dns record-set a add-record --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --record-set-name ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX} --ipv4-address 10.0.0.100 --only-show-errors --output none"
515+
peering_cmd += "; echo \" 8/8 ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX} private DNS zone record created\n\""
516+
517+
return peering_cmd
518+
519+
def clear_aks_vnet_peerings():
520+
delete_peering_cmd = "; echo \"--------Clearing AKS MGMT VNETs Peerings--------\""
521+
delete_peering_cmd += "; az network vnet wait --resource-group ${AKS_RESOURCE_GROUP} --name ${AKS_MGMT_VNET_NAME} --created --timeout 180"
522+
delete_peering_cmd += "; echo \" ${AKS_MGMT_VNET_NAME} found \""
523+
524+
# List all peering names and store them in an array
525+
delete_peering_cmd += "; PEERING_NAMES=$(az network vnet peering list --resource-group ${AKS_RESOURCE_GROUP} --vnet-name ${AKS_MGMT_VNET_NAME} --query \"[].name\" --output tsv)"
526+
delete_peering_cmd += "; for PEERING_NAME in ${PEERING_NAMES[@]}; do echo \"Deleting peering: ${PEERING_NAME}\"; az network vnet peering delete --name ${PEERING_NAME} --resource-group ${AKS_RESOURCE_GROUP} --vnet-name ${AKS_MGMT_VNET_NAME}; done"
527+
delete_peering_cmd += "; echo \"All VNETs Peerings deleted in ${AKS_MGMT_VNET_NAME}\""
528+
529+
return delete_peering_cmd
530+
457531
##############################
458532
# Actual work happens here
459533
##############################

scripts/aks-as-mgmt.sh

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ make --directory="${REPO_ROOT}" "${KUBECTL##*/}" "${AZWI##*/}"
3030
export MGMT_CLUSTER_NAME="${MGMT_CLUSTER_NAME:-aks-mgmt-capz-${RANDOM_SUFFIX}}" # management cluster name
3131
export AKS_RESOURCE_GROUP="${AKS_RESOURCE_GROUP:-aks-mgmt-capz-${RANDOM_SUFFIX}}" # resource group name
3232
export AKS_NODE_RESOURCE_GROUP="node-${AKS_RESOURCE_GROUP}"
33-
export KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.30.2}"
33+
export AKS_MGMT_KUBERNETES_VERSION="${AKS_MGMT_KUBERNETES_VERSION:-v1.30.2}"
3434
export AZURE_LOCATION="${AZURE_LOCATION:-westus2}"
3535
export AKS_NODE_VM_SIZE="${AKS_NODE_VM_SIZE:-"Standard_B2s"}"
3636
export AKS_NODE_COUNT="${AKS_NODE_COUNT:-1}"
@@ -42,6 +42,13 @@ export AZWI_STORAGE_CONTAINER="\$web"
4242
export SERVICE_ACCOUNT_SIGNING_PUB_FILEPATH="${SERVICE_ACCOUNT_SIGNING_PUB_FILEPATH:-}"
4343
export SERVICE_ACCOUNT_SIGNING_KEY_FILEPATH="${SERVICE_ACCOUNT_SIGNING_KEY_FILEPATH:-}"
4444
export REGISTRY="${REGISTRY:-}"
45+
export AKS_MGMT_VNET_NAME="${AKS_MGMT_VNET_NAME:-"aks-mgmt-vnet-${RANDOM_SUFFIX}"}"
46+
export AKS_MGMT_VNET_CIDR="${AKS_MGMT_VNET_CIDR:-"20.255.0.0/16"}"
47+
export AKS_MGMT_SERVICE_CIDR="${AKS_MGMT_SERVICE_CIDR:-"20.255.254.0/24"}"
48+
export AKS_MGMT_DNS_SERVICE_IP="${AKS_MGMT_DNS_SERVICE_IP:-"20.255.254.100"}"
49+
export AKS_MGMT_SUBNET_NAME="${AKS_MGMT_SUBNET_NAME:-"aks-mgmt-subnet-${RANDOM_SUFFIX}"}"
50+
export AKS_MGMT_SUBNET_CIDR="${AKS_MGMT_SUBNET_CIDR:-"20.255.0.0/24"}"
51+
4552

4653
export AZURE_SUBSCRIPTION_ID="${AZURE_SUBSCRIPTION_ID:-}"
4754
export AZURE_CLIENT_ID="${AZURE_CLIENT_ID:-}"
@@ -63,7 +70,7 @@ main() {
6370
echo "MGMT_CLUSTER_NAME: $MGMT_CLUSTER_NAME"
6471
echo "AKS_RESOURCE_GROUP: $AKS_RESOURCE_GROUP"
6572
echo "AKS_NODE_RESOURCE_GROUP: $AKS_NODE_RESOURCE_GROUP"
66-
echo "KUBERNETES_VERSION: $KUBERNETES_VERSION"
73+
echo "AKS_MGMT_KUBERNETES_VERSION: $AKS_MGMT_KUBERNETES_VERSION"
6774
echo "AZURE_LOCATION: $AZURE_LOCATION"
6875
echo "AKS_NODE_VM_SIZE: $AKS_NODE_VM_SIZE"
6976
echo "AZURE_NODE_MACHINE_TYPE: $AZURE_NODE_MACHINE_TYPE"
@@ -76,6 +83,12 @@ main() {
7683
echo "SERVICE_ACCOUNT_SIGNING_KEY_FILEPATH: $SERVICE_ACCOUNT_SIGNING_KEY_FILEPATH"
7784
echo "REGISTRY: $REGISTRY"
7885
echo "APISERVER_LB_DNS_SUFFIX: $APISERVER_LB_DNS_SUFFIX"
86+
echo "AKS_MGMT_VNET_NAME: $AKS_MGMT_VNET_NAME"
87+
echo "AKS_MGMT_VNET_CIDR: $AKS_MGMT_VNET_CIDR"
88+
echo "AKS_MGMT_SERVICE_CIDR: $AKS_MGMT_SERVICE_CIDR"
89+
echo "AKS_MGMT_DNS_SERVICE_IP: $AKS_MGMT_DNS_SERVICE_IP"
90+
echo "AKS_MGMT_SUBNET_NAME: $AKS_MGMT_SUBNET_NAME"
91+
echo "AKS_MGMT_SUBNET_CIDR: $AKS_MGMT_SUBNET_CIDR"
7992

8093
echo "AZURE_SUBSCRIPTION_ID: $AZURE_SUBSCRIPTION_ID"
8194
echo "AZURE_CLIENT_ID: $AZURE_CLIENT_ID"
@@ -102,6 +115,16 @@ create_aks_cluster() {
102115
--location "${AZURE_LOCATION}" \
103116
--output none --only-show-errors \
104117
--tags creationTimestamp="${TIMESTAMP}" jobName="${JOB_NAME}" buildProvenance="${BUILD_PROVENANCE}"
118+
119+
echo "creating vnet for the resource group ${AKS_RESOURCE_GROUP}"
120+
az network vnet create \
121+
--resource-group "${AKS_RESOURCE_GROUP}"\
122+
--name "${AKS_MGMT_VNET_NAME}" \
123+
--address-prefix "${AKS_MGMT_VNET_CIDR}" \
124+
--subnet-name "${AKS_MGMT_SUBNET_NAME}" \
125+
--subnet-prefix "${AKS_MGMT_SUBNET_CIDR}" \
126+
--output none --only-show-errors \
127+
--tags creationTimestamp="${TIMESTAMP}" jobName="${JOB_NAME}" buildProvenance="${BUILD_PROVENANCE}"
105128
fi
106129

107130
aks_exists=$(az aks show --name "${MGMT_CLUSTER_NAME}" --resource-group "${AKS_RESOURCE_GROUP}" 2>&1 || true) # true because we want to continue if the command fails
@@ -110,13 +133,16 @@ create_aks_cluster() {
110133
az aks create --name "${MGMT_CLUSTER_NAME}" \
111134
--resource-group "${AKS_RESOURCE_GROUP}" \
112135
--location "${AZURE_LOCATION}" \
113-
--kubernetes-version "${KUBERNETES_VERSION}" \
136+
--kubernetes-version "${AKS_MGMT_KUBERNETES_VERSION}" \
114137
--node-count "${AKS_NODE_COUNT}" \
115138
--node-vm-size "${AKS_NODE_VM_SIZE}" \
116139
--node-resource-group "${AKS_NODE_RESOURCE_GROUP}" \
117140
--vm-set-type VirtualMachineScaleSets \
118141
--generate-ssh-keys \
119142
--network-plugin azure \
143+
--vnet-subnet-id "/subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${AKS_RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${AKS_MGMT_VNET_NAME}/subnets/${AKS_MGMT_SUBNET_NAME}" \
144+
--service-cidr "${AKS_MGMT_SERVICE_CIDR}" \
145+
--dns-service-ip "${AKS_MGMT_DNS_SERVICE_IP}" \
120146
--tags creationTimestamp="${TIMESTAMP}" jobName="${JOB_NAME}" buildProvenance="${BUILD_PROVENANCE}" \
121147
--output none --only-show-errors;
122148
elif echo "$aks_exists" | grep -q "${MGMT_CLUSTER_NAME}"; then
@@ -127,6 +153,7 @@ create_aks_cluster() {
127153
fi
128154

129155
# check and save kubeconfig
156+
echo -e "\n"
130157
echo "saving credentials of cluster ${MGMT_CLUSTER_NAME} in ${REPO_ROOT}/${MGMT_CLUSTER_KUBECONFIG}"
131158
az aks get-credentials --name "${MGMT_CLUSTER_NAME}" --resource-group "${AKS_RESOURCE_GROUP}" \
132159
--file "${REPO_ROOT}/${MGMT_CLUSTER_KUBECONFIG}" --only-show-errors
@@ -179,15 +206,10 @@ create_aks_cluster() {
179206
set_env_varaibles(){
180207
cat <<EOF > tilt-settings-temp.yaml
181208
kustomize_substitutions:
182-
MGMT_CLUSTER_NAME: "${MGMT_CLUSTER_NAME}"
183209
AKS_RESOURCE_GROUP: "${AKS_RESOURCE_GROUP}"
184210
AKS_NODE_RESOURCE_GROUP: "${AKS_NODE_RESOURCE_GROUP}"
185-
MGMT_CLUSTER_KUBECONFIG: "${MGMT_CLUSTER_KUBECONFIG}"
186-
AKS_MI_CLIENT_ID: "${AKS_MI_CLIENT_ID}"
187-
AKS_MI_OBJECT_ID: "${AKS_MI_OBJECT_ID}"
188-
AKS_MI_RESOURCE_ID: "${AKS_MI_RESOURCE_ID}"
189-
MANAGED_IDENTITY_NAME: "${MANAGED_IDENTITY_NAME}"
190-
MANAGED_IDENTITY_RG: "${MANAGED_IDENTITY_RG}"
211+
AKS_MGMT_VNET_NAME: "${AKS_MGMT_VNET_NAME}"
212+
MGMT_CLUSTER_NAME: "${MGMT_CLUSTER_NAME}"
191213
AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY: "${AKS_MI_CLIENT_ID}"
192214
CI_RG: "${MANAGED_IDENTITY_RG}"
193215
USER_IDENTITY: "${MANAGED_IDENTITY_NAME}"

templates/cluster-template-private.yaml

Lines changed: 18 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)