From 111b923c4c1fd229cd79e8392eaf17f66eaa6443 Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Wed, 9 Jul 2025 16:52:59 +0530 Subject: [PATCH 1/7] SKIP UPGRADE TEST --- README.md | 1 + ibm_catalog.json | 3 +++ main.tf | 9 +++++++++ solutions/fully-configurable/main.tf | 1 + solutions/fully-configurable/variables.tf | 6 ++++++ tests/resources/main.tf | 2 +- variables.tf | 6 ++++++ 7 files changed, 27 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bb53a0a..8419e15 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,7 @@ No modules. | [name](#input\_name) | Cloud Monitoring agent name. Used for naming all kubernetes and helm resources on the cluster. | `string` | `"sysdig-agent"` | no | | [namespace](#input\_namespace) | Namespace where to deploy the Cloud Monitoring agent. Default value is 'ibm-observe' | `string` | `"ibm-observe"` | no | | [tolerations](#input\_tolerations) | List of tolerations to apply to Cloud Monitoring agent. |
list(object({
key = optional(string)
operator = optional(string)
value = optional(string)
effect = optional(string)
tolerationSeconds = optional(number)
}))
|
[
{
"operator": "Exists"
},
{
"effect": "NoSchedule",
"key": "node-role.kubernetes.io/master",
"operator": "Exists"
}
]
| no | +| [universal\_ebpf](#input\_universal\_ebpf) | Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+. | `bool` | `true` | no | | [wait\_till](#input\_wait\_till) | To avoid long wait times when you run your Terraform code, you can specify the stage when you want Terraform to mark the cluster resource creation as completed. Depending on what stage you choose, the cluster creation might not be fully completed and continues to run in the background. However, your Terraform code can continue to run without waiting for the cluster to be fully created. Supported args are `MasterNodeReady`, `OneWorkerNodeReady`, `IngressReady` and `Normal` | `string` | `"Normal"` | no | | [wait\_till\_timeout](#input\_wait\_till\_timeout) | Timeout for wait\_till in minutes. | `number` | `90` | no | diff --git a/ibm_catalog.json b/ibm_catalog.json index 2635530..d1ef595 100644 --- a/ibm_catalog.json +++ b/ibm_catalog.json @@ -259,6 +259,9 @@ }, { "key": "wait_till_timeout" + }, + { + "key": "universal_ebpf" } ], "install_type": "fullstack" diff --git a/main.tf b/main.tf index 221d5cd..2eb7959 100644 --- a/main.tf +++ b/main.tf @@ -131,6 +131,15 @@ resource "helm_release" "cloud_monitoring_agent" { type = "string" value = regex("@(.*)", var.kernel_module_image_tag_digest)[0] } + set { + name = "agent.ebpf.enabled" + value = var.universal_ebpf + } + + set { + name = "agent.ebpf.kind" + value = "universal_ebpf" + } # Specific to SCC WP, enabled by default set { name = "nodeAnalyzer.enabled" diff --git a/solutions/fully-configurable/main.tf b/solutions/fully-configurable/main.tf index 20554b6..58d18a1 100644 --- a/solutions/fully-configurable/main.tf +++ b/solutions/fully-configurable/main.tf @@ -42,4 +42,5 @@ module "monitoring_agent" { agent_limits_memory = var.agent_limits_memory agent_requests_cpu = var.agent_requests_cpu agent_requests_memory = var.agent_requests_memory + universal_ebpf = var.universal_ebpf } diff --git a/solutions/fully-configurable/variables.tf b/solutions/fully-configurable/variables.tf index ad6be59..e21dfa6 100644 --- a/solutions/fully-configurable/variables.tf +++ b/solutions/fully-configurable/variables.tf @@ -232,3 +232,9 @@ variable "agent_limits_memory" { description = "Specifies the memory limit for the agent." default = "1024Mi" } + +variable "universal_ebpf" { + type = bool + description = "Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+." + default = true +} diff --git a/tests/resources/main.tf b/tests/resources/main.tf index 1ad6f54..eb27725 100644 --- a/tests/resources/main.tf +++ b/tests/resources/main.tf @@ -55,7 +55,7 @@ locals { pool_name = "default" # ibm_container_vpc_cluster automatically names default pool "default" (See https://github.com/IBM-Cloud/terraform-provider-ibm/issues/2849) machine_type = "bx2.4x16" workers_per_zone = 2 # minimum of 2 is allowed when using single zone - operating_system = "REDHAT_8_64" + operating_system = "RHCOS" } ] } diff --git a/variables.tf b/variables.tf index b7d8742..65bf62b 100644 --- a/variables.tf +++ b/variables.tf @@ -256,3 +256,9 @@ variable "agent_limits_memory" { description = "Specifies the memory limit for the agent." default = "1024Mi" } + +variable "universal_ebpf" { + type = bool + description = "Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+." + default = true +} From 737b4de96aa8a97894244ab53ea22c1093b546e8 Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Wed, 9 Jul 2025 19:47:28 +0530 Subject: [PATCH 2/7] changed var name --- README.md | 2 +- ibm_catalog.json | 2 +- main.tf | 2 +- solutions/fully-configurable/main.tf | 2 +- solutions/fully-configurable/variables.tf | 2 +- variables.tf | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 8419e15..5236a52 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ No modules. | [cluster\_id](#input\_cluster\_id) | The ID of the cluster you wish to deploy the agent in | `string` | n/a | yes | | [cluster\_resource\_group\_id](#input\_cluster\_resource\_group\_id) | The Resource Group ID of the cluster | `string` | n/a | yes | | [container\_filter](#input\_container\_filter) | To filter custom containers, specify which containers to include or exclude from metrics collection for the cloud monitoring agent. See https://cloud.ibm.com/docs/monitoring?topic=monitoring-change_kube_agent#change_kube_agent_filter_data. |
list(object({
type = string
parameter = string
name = string
}))
| `[]` | no | +| [enable\_universal\_ebpf](#input\_enable\_universal\_ebpf) | Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+. | `bool` | `true` | no | | [existing\_access\_key\_secret\_name](#input\_existing\_access\_key\_secret\_name) | An alternative to using the Sysdig Agent `access_key`. Specify the name of a Kubernetes secret containing an access-key entry. Either `access_key` or `existing_access_key_secret_name` is required. | `string` | `null` | no | | [image\_registry\_base\_url](#input\_image\_registry\_base\_url) | The image registry base URL to pull the Cloud Monitoring agent images from. For example `icr.io`, `quay.io`, etc. | `string` | `"icr.io"` | no | | [image\_registry\_namespace](#input\_image\_registry\_namespace) | The namespace within the image registry to pull the Cloud Monitoring agent images from. | `string` | `"ext/sysdig"` | no | @@ -131,7 +132,6 @@ No modules. | [name](#input\_name) | Cloud Monitoring agent name. Used for naming all kubernetes and helm resources on the cluster. | `string` | `"sysdig-agent"` | no | | [namespace](#input\_namespace) | Namespace where to deploy the Cloud Monitoring agent. Default value is 'ibm-observe' | `string` | `"ibm-observe"` | no | | [tolerations](#input\_tolerations) | List of tolerations to apply to Cloud Monitoring agent. |
list(object({
key = optional(string)
operator = optional(string)
value = optional(string)
effect = optional(string)
tolerationSeconds = optional(number)
}))
|
[
{
"operator": "Exists"
},
{
"effect": "NoSchedule",
"key": "node-role.kubernetes.io/master",
"operator": "Exists"
}
]
| no | -| [universal\_ebpf](#input\_universal\_ebpf) | Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+. | `bool` | `true` | no | | [wait\_till](#input\_wait\_till) | To avoid long wait times when you run your Terraform code, you can specify the stage when you want Terraform to mark the cluster resource creation as completed. Depending on what stage you choose, the cluster creation might not be fully completed and continues to run in the background. However, your Terraform code can continue to run without waiting for the cluster to be fully created. Supported args are `MasterNodeReady`, `OneWorkerNodeReady`, `IngressReady` and `Normal` | `string` | `"Normal"` | no | | [wait\_till\_timeout](#input\_wait\_till\_timeout) | Timeout for wait\_till in minutes. | `number` | `90` | no | diff --git a/ibm_catalog.json b/ibm_catalog.json index d1ef595..e5d8f85 100644 --- a/ibm_catalog.json +++ b/ibm_catalog.json @@ -261,7 +261,7 @@ "key": "wait_till_timeout" }, { - "key": "universal_ebpf" + "key": "enable_universal_ebpf" } ], "install_type": "fullstack" diff --git a/main.tf b/main.tf index 2eb7959..5e29207 100644 --- a/main.tf +++ b/main.tf @@ -133,7 +133,7 @@ resource "helm_release" "cloud_monitoring_agent" { } set { name = "agent.ebpf.enabled" - value = var.universal_ebpf + value = var.enable_universal_ebpf } set { diff --git a/solutions/fully-configurable/main.tf b/solutions/fully-configurable/main.tf index 58d18a1..bf545aa 100644 --- a/solutions/fully-configurable/main.tf +++ b/solutions/fully-configurable/main.tf @@ -42,5 +42,5 @@ module "monitoring_agent" { agent_limits_memory = var.agent_limits_memory agent_requests_cpu = var.agent_requests_cpu agent_requests_memory = var.agent_requests_memory - universal_ebpf = var.universal_ebpf + enable_universal_ebpf = var.enable_universal_ebpf } diff --git a/solutions/fully-configurable/variables.tf b/solutions/fully-configurable/variables.tf index e21dfa6..7374b04 100644 --- a/solutions/fully-configurable/variables.tf +++ b/solutions/fully-configurable/variables.tf @@ -233,7 +233,7 @@ variable "agent_limits_memory" { default = "1024Mi" } -variable "universal_ebpf" { +variable "enable_universal_ebpf" { type = bool description = "Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+." default = true diff --git a/variables.tf b/variables.tf index 65bf62b..7185757 100644 --- a/variables.tf +++ b/variables.tf @@ -257,7 +257,7 @@ variable "agent_limits_memory" { default = "1024Mi" } -variable "universal_ebpf" { +variable "enable_universal_ebpf" { type = bool description = "Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+." default = true From 3ed5c61367a2a1b5e0c34e6e1c0a437d80b69a96 Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Wed, 9 Jul 2025 20:47:24 +0530 Subject: [PATCH 3/7] changed OS in examples --- examples/obs-agent-iks/main.tf | 1 + examples/obs-agent-ocp/main.tf | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/obs-agent-iks/main.tf b/examples/obs-agent-iks/main.tf index fcc1c0f..9992d85 100644 --- a/examples/obs-agent-iks/main.tf +++ b/examples/obs-agent-iks/main.tf @@ -110,4 +110,5 @@ module "monitoring_agents" { # # Monitoring agent access_key = module.cloud_monitoring.access_key cloud_monitoring_instance_region = var.region + enable_universal_ebpf = true } diff --git a/examples/obs-agent-ocp/main.tf b/examples/obs-agent-ocp/main.tf index be816b1..7a8e058 100644 --- a/examples/obs-agent-ocp/main.tf +++ b/examples/obs-agent-ocp/main.tf @@ -54,7 +54,7 @@ locals { subnet_prefix = "default" pool_name = "default" # ibm_container_vpc_cluster automatically names default pool "default" (See https://github.com/IBM-Cloud/terraform-provider-ibm/issues/2849) machine_type = "bx2.4x16" - operating_system = "REDHAT_8_64" + operating_system = "RHEL_9_64" workers_per_zone = 2 # minimum of 2 is allowed when using single zone } ] @@ -107,7 +107,8 @@ module "monitoring_agents" { access_key = module.cloud_monitoring.access_key cloud_monitoring_instance_region = var.region # example of how to include / exclude metrics - more info https://cloud.ibm.com/docs/monitoring?topic=monitoring-change_kube_agent#change_kube_agent_log_metrics - metrics_filter = [{ exclude = "metricA.*" }, { include = "metricB.*" }] - container_filter = [{ type = "exclude", parameter = "kubernetes.namespace.name", name = "kube-system" }] - blacklisted_ports = [22, 2379, 3306] + metrics_filter = [{ exclude = "metricA.*" }, { include = "metricB.*" }] + container_filter = [{ type = "exclude", parameter = "kubernetes.namespace.name", name = "kube-system" }] + blacklisted_ports = [22, 2379, 3306] + enable_universal_ebpf = true } From 7624397413801d60174a611c53f683bbb2d1d60d Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Thu, 10 Jul 2025 14:55:45 +0530 Subject: [PATCH 4/7] added doc for enabling ebpf --- solutions/fully-configurable/DA-docs.md | 12 ++++++++++++ solutions/fully-configurable/variables.tf | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 solutions/fully-configurable/DA-docs.md diff --git a/solutions/fully-configurable/DA-docs.md b/solutions/fully-configurable/DA-docs.md new file mode 100644 index 0000000..42ca10a --- /dev/null +++ b/solutions/fully-configurable/DA-docs.md @@ -0,0 +1,12 @@ +When attempting to deploy the agents to cluster nodes on RH CoreOS that have no public gateways enabled (and/or have outbound traffic disabled), the pods fail to come up with the error: +``` +Download of sysdigcloud-probe for version 13.9.2 failed. +curl: (28) Failed to connect to download.sysdig.com port 443: Connection timed out +Cannot load the probe +``` + +This happens because the agent tries to connect to the kernel and for that it needs a kernel module (default behaviour): +- If not available in the machine already, it tries to build it with the kernel headers +- if kernel headers not available, it tries to download it + +To fix this, we need the ability to set the helm values `agent.ebpf.enabled` and `agent.ebpf.kind` if cluster is using nodes based on RHCOS by setting the terraform boolean input variable called `enable_universal_ebpf` to true. Enabling universal ebpf needs kernel version to be `5.8` or higher. RHEL8 already has the kernel headers and enabling `ebpf` will not cause any impact even though kernel version is `4.18`. diff --git a/solutions/fully-configurable/variables.tf b/solutions/fully-configurable/variables.tf index 7374b04..15a42f3 100644 --- a/solutions/fully-configurable/variables.tf +++ b/solutions/fully-configurable/variables.tf @@ -235,6 +235,6 @@ variable "agent_limits_memory" { variable "enable_universal_ebpf" { type = bool - description = "Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+." + description = "Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+. [Learn more](https://github.com/terraform-ibm-modules/terraform-ibm-monitoring-agent/tree/main/solutions/fully-configurable/DA-docs.md)." default = true } From 6d0e8ba8d9f6cb054a33ea5a3459c58b46199fb3 Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Thu, 10 Jul 2025 17:20:53 +0530 Subject: [PATCH 5/7] addressed review comments --- solutions/fully-configurable/DA-docs.md | 30 ++++++++++++++--------- solutions/fully-configurable/variables.tf | 2 +- variables.tf | 2 +- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/solutions/fully-configurable/DA-docs.md b/solutions/fully-configurable/DA-docs.md index 42ca10a..e244811 100644 --- a/solutions/fully-configurable/DA-docs.md +++ b/solutions/fully-configurable/DA-docs.md @@ -1,12 +1,18 @@ -When attempting to deploy the agents to cluster nodes on RH CoreOS that have no public gateways enabled (and/or have outbound traffic disabled), the pods fail to come up with the error: -``` -Download of sysdigcloud-probe for version 13.9.2 failed. -curl: (28) Failed to connect to download.sysdig.com port 443: Connection timed out -Cannot load the probe -``` - -This happens because the agent tries to connect to the kernel and for that it needs a kernel module (default behaviour): -- If not available in the machine already, it tries to build it with the kernel headers -- if kernel headers not available, it tries to download it - -To fix this, we need the ability to set the helm values `agent.ebpf.enabled` and `agent.ebpf.kind` if cluster is using nodes based on RHCOS by setting the terraform boolean input variable called `enable_universal_ebpf` to true. Enabling universal ebpf needs kernel version to be `5.8` or higher. RHEL8 already has the kernel headers and enabling `ebpf` will not cause any impact even though kernel version is `4.18`. +## When to Enable `enable_universal_ebpf` + +For Clusters using Red Hat CoreOS (RHCOS) or RHEL 9 nodes with restricted outbound internet access, the monitoring agent pods may fail to start due to the inability to retrieve kernel modules which are necessary for the agent to connect with kernel. + +Setting the Terraform variable `enable_universal_ebpf` to `true` ensures the agent uses eBPF-based instrumentation, which avoids the need for external downloads and allows successful deployment in restricted environments. + +### When Should You Enable It? + +Set `enable_universal_ebpf` to true if: + +- Your cluster nodes run on RHCOS or RHEL 9 and do not have public or outbound internet access. +- You want to avoid relying on dynamic downloads for kernel modules. + +### Kernel Compatibility + +- **RHCOS and RHEL9**: Since kernel version **5.14** is used. Default value for variable has been set to true. +- **RHEL 8**: Although it uses kernel version **4.18**, the necessary kernel headers are pre-installed, so enabling eBPF is safe and has no impact. + diff --git a/solutions/fully-configurable/variables.tf b/solutions/fully-configurable/variables.tf index 15a42f3..5113642 100644 --- a/solutions/fully-configurable/variables.tf +++ b/solutions/fully-configurable/variables.tf @@ -235,6 +235,6 @@ variable "agent_limits_memory" { variable "enable_universal_ebpf" { type = bool - description = "Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+. [Learn more](https://github.com/terraform-ibm-modules/terraform-ibm-monitoring-agent/tree/main/solutions/fully-configurable/DA-docs.md)." + description = "Deploy monitoring agent with universal eBPF enabled. It requires kernel version 5.8+. [Learn more](https://github.com/terraform-ibm-modules/terraform-ibm-monitoring-agent/tree/main/solutions/fully-configurable/DA-docs.md)." default = true } diff --git a/variables.tf b/variables.tf index 7185757..6343415 100644 --- a/variables.tf +++ b/variables.tf @@ -259,6 +259,6 @@ variable "agent_limits_memory" { variable "enable_universal_ebpf" { type = bool - description = "Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+." + description = "Deploy monitoring agent with universal eBPF enabled. It requires kernel version 5.8+. [Learn more](https://github.com/terraform-ibm-modules/terraform-ibm-monitoring-agent/tree/main/solutions/fully-configurable/DA-docs.md)." default = true } From f440f659347399ea99b1b4a4f1b4919de36109fa Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Thu, 10 Jul 2025 17:28:46 +0530 Subject: [PATCH 6/7] addressed review comments --- solutions/fully-configurable/DA-docs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solutions/fully-configurable/DA-docs.md b/solutions/fully-configurable/DA-docs.md index e244811..e621348 100644 --- a/solutions/fully-configurable/DA-docs.md +++ b/solutions/fully-configurable/DA-docs.md @@ -2,7 +2,7 @@ For Clusters using Red Hat CoreOS (RHCOS) or RHEL 9 nodes with restricted outbound internet access, the monitoring agent pods may fail to start due to the inability to retrieve kernel modules which are necessary for the agent to connect with kernel. -Setting the Terraform variable `enable_universal_ebpf` to `true` ensures the agent uses eBPF-based instrumentation, which avoids the need for external downloads and allows successful deployment in restricted environments. +Setting the input variable `enable_universal_ebpf` to `true` ensures the agent uses eBPF-based instrumentation, which avoids the need for external downloads and allows successful deployment in restricted environments. ### When Should You Enable It? @@ -13,6 +13,6 @@ Set `enable_universal_ebpf` to true if: ### Kernel Compatibility -- **RHCOS and RHEL9**: Since kernel version **5.14** is used. Default value for variable has been set to true. +- **RHCOS and RHEL9**: Since kernel version **5.14 or later** is used. Default value for variable has been set to true. - **RHEL 8**: Although it uses kernel version **4.18**, the necessary kernel headers are pre-installed, so enabling eBPF is safe and has no impact. From d32a1ea4b95e9bf935f017c4c27d3bac8907da89 Mon Sep 17 00:00:00 2001 From: Vipin Kumar Date: Thu, 10 Jul 2025 17:57:12 +0530 Subject: [PATCH 7/7] fixed precommit --- README.md | 2 +- solutions/fully-configurable/DA-docs.md | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 5236a52..4c8079e 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ No modules. | [cluster\_id](#input\_cluster\_id) | The ID of the cluster you wish to deploy the agent in | `string` | n/a | yes | | [cluster\_resource\_group\_id](#input\_cluster\_resource\_group\_id) | The Resource Group ID of the cluster | `string` | n/a | yes | | [container\_filter](#input\_container\_filter) | To filter custom containers, specify which containers to include or exclude from metrics collection for the cloud monitoring agent. See https://cloud.ibm.com/docs/monitoring?topic=monitoring-change_kube_agent#change_kube_agent_filter_data. |
list(object({
type = string
parameter = string
name = string
}))
| `[]` | no | -| [enable\_universal\_ebpf](#input\_enable\_universal\_ebpf) | Deploy sysdig agent with universal eBPF enabled. It requires kernel version 5.8+. | `bool` | `true` | no | +| [enable\_universal\_ebpf](#input\_enable\_universal\_ebpf) | Deploy monitoring agent with universal eBPF enabled. It requires kernel version 5.8+. [Learn more](https://github.com/terraform-ibm-modules/terraform-ibm-monitoring-agent/tree/main/solutions/fully-configurable/DA-docs.md). | `bool` | `true` | no | | [existing\_access\_key\_secret\_name](#input\_existing\_access\_key\_secret\_name) | An alternative to using the Sysdig Agent `access_key`. Specify the name of a Kubernetes secret containing an access-key entry. Either `access_key` or `existing_access_key_secret_name` is required. | `string` | `null` | no | | [image\_registry\_base\_url](#input\_image\_registry\_base\_url) | The image registry base URL to pull the Cloud Monitoring agent images from. For example `icr.io`, `quay.io`, etc. | `string` | `"icr.io"` | no | | [image\_registry\_namespace](#input\_image\_registry\_namespace) | The namespace within the image registry to pull the Cloud Monitoring agent images from. | `string` | `"ext/sysdig"` | no | diff --git a/solutions/fully-configurable/DA-docs.md b/solutions/fully-configurable/DA-docs.md index e621348..5e6fff2 100644 --- a/solutions/fully-configurable/DA-docs.md +++ b/solutions/fully-configurable/DA-docs.md @@ -15,4 +15,3 @@ Set `enable_universal_ebpf` to true if: - **RHCOS and RHEL9**: Since kernel version **5.14 or later** is used. Default value for variable has been set to true. - **RHEL 8**: Although it uses kernel version **4.18**, the necessary kernel headers are pre-installed, so enabling eBPF is safe and has no impact. -