From b2876e27a97f291fde95b24c1ef8625cdcf2501b Mon Sep 17 00:00:00 2001 From: Louies-Jhony Date: Tue, 1 Jul 2025 22:45:56 +0530 Subject: [PATCH] DA Scale --- datasource.tf | 13 +- ibm_catalog.json | 894 +++++++++++------- locals.tf | 120 ++- main.tf | 292 ++++-- .../tasks/mgmt-cloudmonitoring-configure.yml | 52 +- .../roles/cloudmonitoring/vars/main.yml | 1 + .../tasks/login_node_configuration.yml | 80 ++ .../roles/lsf_login_config/tasks/main.yml | 4 + .../roles/lsf_login_config/vars/main.yml | 8 + .../tasks/app_center_configure.yml | 73 ++ .../configure_dynamic_nodes_templates.yml | 10 - .../tasks/configure_management_nodes.yml | 18 + .../tasks/hosts_file_update.yml | 2 +- .../lsf_mgmt_config/tasks/hyperthreading.yml | 22 - .../roles/lsf_mgmt_config/tasks/main.yml | 2 + .../templates/ibmcloudgen2_templates.json.j2 | 1 + .../lsf_mgmt_config/templates/user_data.sh | 368 +++---- .../roles/lsf_mgmt_config/vars/main.yml | 3 +- .../tasks/cluster_validation.yml | 43 + .../tasks/configure_shared_folders.yml | 116 +++ .../roles/lsf_post_config/tasks/main.yml | 13 + .../tasks/permissions_setup.yml | 19 + .../lsf_post_config/tasks/reload_services.yml | 14 + .../roles/lsf_post_config/vars/main.yml | 6 + .../tasks/hyperthreading.yml | 22 + .../tasks/lsf_tunables.yml | 0 .../tasks/lsfadmin_creation.yml | 1 + .../tasks/lsfadmin_password_less_auth.yml | 0 .../tasks/main.yml | 6 + .../tasks/mtu_configuration.yml | 0 .../vars/main.yml | 1 + .../tasks/lsf_inventory.yml | 16 +- .../tasks/lsf_prepare.yml | 0 .../tasks/main.yml | 3 - .../tasks/python_installation.yml | 0 .../templates/fp14-config.j2 | 0 .../templates/fp14-inventory.j2 | 2 + .../templates/fp15-config.j2 | 0 .../templates/fp15-inventory.j2 | 0 .../vars/main.yml | 0 .../handlers/main.yml | 0 .../tasks/main.yml | 0 .../tasks/vpc_fileshare_configure.yml | 8 + modules/baremetal/datasource.tf | 11 +- modules/baremetal/locals.tf | 54 +- modules/baremetal/main.tf | 51 +- modules/baremetal/outputs.tf | 5 + modules/baremetal/template_files.tf | 4 +- modules/baremetal/templates/cloud_init.yml | 188 ++++ .../baremetal/templates/storage_user_data.tpl | 3 + modules/baremetal/variables.tf | 20 + .../encryption_configuration.tf | 54 ++ .../encryption_configuration/variables.tf | 16 + modules/deployer/datasource.tf | 10 +- modules/deployer/image_map.tf | 30 + modules/deployer/locals.tf | 45 +- modules/deployer/main.tf | 39 +- modules/deployer/outputs.tf | 7 +- modules/deployer/template_files.tf | 8 +- modules/deployer/variables.tf | 93 +- modules/inventory/main.tf | 11 +- modules/inventory/variables.tf | 6 + modules/key_protect/datasource.tf | 4 + modules/key_protect/main.tf | 48 + modules/key_protect/outputs.tf | 0 modules/key_protect/variables.tf | 6 + modules/key_protect/version.tf | 14 + modules/landing_zone/locals.tf | 32 +- modules/landing_zone/main.tf | 4 +- modules/landing_zone/outputs.tf | 15 +- modules/landing_zone/variables.tf | 33 +- modules/landing_zone_vsi/datasource.tf | 9 +- modules/landing_zone_vsi/image_map.tf | 73 +- modules/landing_zone_vsi/locals.tf | 96 +- modules/landing_zone_vsi/main.tf | 136 ++- modules/landing_zone_vsi/outputs.tf | 20 + modules/landing_zone_vsi/template_files.tf | 57 +- .../templates/compute_user_data.tpl | 203 ---- .../templates/gklm_user_data.tpl | 2 + .../templates/login_user_data.tpl | 62 ++ .../templates/lsf_compute_user_data.tpl | 62 ++ .../templates/management_user_data.tpl | 57 +- .../templates/scale_compute_user_data.tpl | 115 +++ modules/landing_zone_vsi/variables.tf | 79 +- modules/playbook/main.tf | 453 +++++++-- modules/playbook/variables.tf | 46 +- modules/prepare_tf_input/locals.tf | 88 +- modules/prepare_tf_input/main.tf | 20 +- modules/prepare_tf_input/variables.tf | 108 ++- modules/resource_provisioner/locals.tf | 13 +- modules/resource_provisioner/main.tf | 26 +- modules/resource_provisioner/variables.tf | 27 +- modules/write_inventory/datasource.tf | 3 +- modules/write_inventory/image_map.tf | 52 + modules/write_inventory/locals.tf | 18 +- modules/write_inventory/main.tf | 5 +- modules/write_inventory/variables.tf | 25 + modules/write_scale_inventory/variables.tf | 2 +- solutions/scale/locals.tf | 82 +- solutions/scale/main.tf | 50 +- solutions/scale/variables.tf | 107 ++- variables.tf | 220 +++-- 102 files changed, 3747 insertions(+), 1613 deletions(-) create mode 100644 modules/ansible-roles/roles/lsf_login_config/tasks/login_node_configuration.yml create mode 100644 modules/ansible-roles/roles/lsf_login_config/tasks/main.yml create mode 100644 modules/ansible-roles/roles/lsf_login_config/vars/main.yml create mode 100644 modules/ansible-roles/roles/lsf_mgmt_config/tasks/app_center_configure.yml create mode 100644 modules/ansible-roles/roles/lsf_post_config/tasks/cluster_validation.yml create mode 100644 modules/ansible-roles/roles/lsf_post_config/tasks/configure_shared_folders.yml create mode 100644 modules/ansible-roles/roles/lsf_post_config/tasks/main.yml create mode 100644 modules/ansible-roles/roles/lsf_post_config/tasks/permissions_setup.yml create mode 100644 modules/ansible-roles/roles/lsf_post_config/tasks/reload_services.yml create mode 100644 modules/ansible-roles/roles/lsf_post_config/vars/main.yml create mode 100644 modules/ansible-roles/roles/lsf_prereq_config/tasks/hyperthreading.yml rename modules/ansible-roles/roles/{lsf_server_config => lsf_prereq_config}/tasks/lsf_tunables.yml (100%) rename modules/ansible-roles/roles/{lsf => lsf_prereq_config}/tasks/lsfadmin_creation.yml (98%) rename modules/ansible-roles/roles/{lsf_server_config => lsf_prereq_config}/tasks/lsfadmin_password_less_auth.yml (100%) rename modules/ansible-roles/roles/{lsf_server_config => lsf_prereq_config}/tasks/main.yml (65%) rename modules/ansible-roles/roles/{lsf_server_config => lsf_prereq_config}/tasks/mtu_configuration.yml (100%) rename modules/ansible-roles/roles/{lsf_server_config => lsf_prereq_config}/vars/main.yml (76%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/tasks/lsf_inventory.yml (70%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/tasks/lsf_prepare.yml (100%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/tasks/main.yml (79%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/tasks/python_installation.yml (100%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/templates/fp14-config.j2 (100%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/templates/fp14-inventory.j2 (99%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/templates/fp15-config.j2 (100%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/templates/fp15-inventory.j2 (100%) rename modules/ansible-roles/roles/{lsf => lsf_template_config}/vars/main.yml (100%) rename modules/ansible-roles/roles/{vpc_fileshare_configure => vpc_fileshare_config}/handlers/main.yml (100%) rename modules/ansible-roles/roles/{vpc_fileshare_configure => vpc_fileshare_config}/tasks/main.yml (100%) rename modules/ansible-roles/roles/{vpc_fileshare_configure => vpc_fileshare_config}/tasks/vpc_fileshare_configure.yml (87%) create mode 100644 modules/baremetal/templates/cloud_init.yml create mode 100644 modules/common/encryption_configuration/encryption_configuration.tf create mode 100644 modules/common/encryption_configuration/variables.tf create mode 100644 modules/deployer/image_map.tf create mode 100644 modules/key_protect/datasource.tf create mode 100644 modules/key_protect/main.tf create mode 100644 modules/key_protect/outputs.tf create mode 100644 modules/key_protect/variables.tf create mode 100644 modules/key_protect/version.tf delete mode 100644 modules/landing_zone_vsi/templates/compute_user_data.tpl create mode 100644 modules/landing_zone_vsi/templates/login_user_data.tpl create mode 100644 modules/landing_zone_vsi/templates/lsf_compute_user_data.tpl create mode 100644 modules/landing_zone_vsi/templates/scale_compute_user_data.tpl create mode 100644 modules/write_inventory/image_map.tf diff --git a/datasource.tf b/datasource.tf index 8d1716a6..5bb205f1 100644 --- a/datasource.tf +++ b/datasource.tf @@ -10,11 +10,17 @@ data "ibm_is_zone" "zone" { } */ +#Fetching Existing VPC CIDR for Security rules: data "ibm_is_vpc" "existing_vpc" { count = var.vpc_name != null ? 1 : 0 name = var.vpc_name } +data "ibm_is_vpc_address_prefixes" "existing_vpc_cidr" { + count = var.vpc_name != null ? 1 : 0 + vpc = data.ibm_is_vpc.existing_vpc[0].id +} + /* data "ibm_is_subnet" "subnet" { count = length(local.subnets) @@ -28,8 +34,8 @@ data "ibm_is_subnet" "subnet" { # } data "ibm_is_subnet" "existing_cluster_subnets" { - count = var.vpc_name != null && var.cluster_subnet_ids != null ? 1 : 0 - identifier = var.cluster_subnet_ids + count = var.vpc_name != null && var.cluster_subnet_id != null ? 1 : 0 + identifier = var.cluster_subnet_id } data "ibm_is_subnet" "existing_storage_subnets" { @@ -58,6 +64,7 @@ data "ibm_is_ssh_key" "ssh_keys" { } data "ibm_is_subnet" "compute_subnet_crn" { + count = var.vpc_name != null && var.cluster_subnet_id != null ? 1 : 0 identifier = local.compute_subnet_id } @@ -83,7 +90,7 @@ data "ibm_is_instance_profile" "protocol_profile" { } data "ibm_is_subnet_reserved_ips" "protocol_subnet_reserved_ips" { - count = local.scale_ces_enabled == true ? 1 : 0 + count = var.enable_deployer == false && local.scale_ces_enabled == true ? 1 : 0 subnet = local.protocol_subnet_id } diff --git a/ibm_catalog.json b/ibm_catalog.json index 753706fc..b8e4bd0b 100644 --- a/ibm_catalog.json +++ b/ibm_catalog.json @@ -1,14 +1,14 @@ { "products": [ { - "name": "terraform-ibm-modules-terraform-ibm-hpc-scale", - "label": "IBM Storage Scale (BYOL)", + "name": "deploy-arch-ibm-hpc-lsf", + "label": "IBM Spectrum LSF", "product_kind": "solution", "tags": [ "Deployable Architecture", "DA", - "HPC", - "IBM Cloud HPC", + "LSF", + "Spectrum LSF", "ibm_created", "target_terraform", "terraform", @@ -16,7 +16,7 @@ "solution" ], "keywords": [ - "HPC", + "LSF", "vpc", "DA", "Deployable Architecture", @@ -24,14 +24,10 @@ "solution" ], "short_description": "Deploy your high performance computing (HPC) cluster with IBM scheduling software for compute intensive workloads.", - "long_description": "**Before you begin deploying IBM Cloud HPC, make sure that you meet the prerequisites listed in [the step-by-step guide](https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-before-you-begin-deploying).**\n\nIBM Cloud HPC is a deployable architecture where you can deploy both cloud compute resources (vCPU resources) and HPC scheduling software for your compute-intensive HPC workloads. You can reserve capacity on a recurring hourly basis from a dedicated IBM Cloud HPC resource pool.", + "long_description": "**Before you begin deploying IBM Spectrum LSF, make sure that you meet the prerequisites listed in [the step-by-step guide](https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-before-you-begin-deploying).**\n\nIBM Spectrum LSF is a deployable architecture where you can deploy both cloud compute resources (vCPU resources) and HPC scheduling software for your compute-intensive HPC workloads.", "provider_name": "IBM", - "offering_docs_url": "https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-overview", + "offering_docs_url": "https://cloud.ibm.com/docs/hpc-ibm-spectrumlsf?topic=hpc-ibm-spectrumlsf-about-spectrum-lsf", "features": [ - { - "title": "Reserve HPC compute on an hourly basis", - "description": "Reserve HPC compute capacity on a recurring hourly basis. This reserved time slot and capacity cannot be preempted by IBM." - }, { "title": "HPC infrastructure bundled with LSF scheduling software", "description": "Create a cluster with IBM Spectrum LSF in its own subnet and security group for HPC administrators to log in, submit, and manage HPC jobs." @@ -39,415 +35,618 @@ { "title": "Automatic compute scaling with LSF resource connector", "description": "Automatically scale up compute resources based on workload demand. Compute resources are automatically scaled down when no longer required." + }, + { + "title": "Shared file system support through IBM Storage Scale", + "description": "IBM Storage Scale is a high performance, highly available, clustered file system with advanced features like File Audit Logging for security and Active File Management for hybrid cloud connectivity. IBM Storage Scale provides more performance and scalability than standard file storage solutions." } ], "flavors": [ { - "label": "IBM Storage Scale (BYOL)", - "name": "scale", + "label": "Cluster with LSF v10.1.0.15", + "name": "Cluster-with-LSF", "install_type": "fullstack", - "working_directory": "solutions/scale", + "working_directory": "solutions/lsf", "compliance": { "authority": "scc-v3", "profiles": [ { - "profile_name": "IBM Cloud Framework for Financial Services", - "profile_version": "1.5.0" + "profile_name": "CIS IBM Cloud Foundations Benchmark v1.1.0", + "profile_version": "1.1.0" } ] }, - "release_notes_url": "https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-release-notes", + "release_notes_url": "https://cloud.ibm.com/docs/hpc-ibm-spectrumlsf?topic=hpc-ibm-spectrumlsf-my-service-relnotes", "configuration": [ - ], - "iam_permissions": [ { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "appid" + "key": "ibmcloud_api_key" }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "cloud-object-storage" + "key": "existing_resource_group", + "required": true }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "hs-crypto" + "key": "ssh_keys", + "required": true }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::role:Administrator" - ], - "service_name": "iam-identity" + "key": "remote_allowed_ips" }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "kms" + "key": "lsf_version", + "default_value": "fixpack_15", + "required": true, + "options": [ + { + "displayname": "fixpack_15", + "value": "fixpack_15" + }, + { + "displayname": "fixpack_14", + "value": "fixpack_14" + } + ] }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::role:Administrator" - ], - "service_name": "is.vpc" - } - ], - "architecture": { - "descriptions": "", - "features": [ - { - "title": "Separate VPC for HPC workloads", - "description": "Yes" - }, - { - "title": "Virtual Server Instances for every subnet", - "description": "Yes" - }, - { - "title": "Increases security with Key Management", - "description": "Yes" - }, - { - "title": "Reduces failure events by using multizone regions", - "description": "Yes" - }, - { - "title": "Collects and stores Internet Protocol (IP) traffic information with Activity Tracker and Flow Logs", - "description": "Yes" - }, - { - "title": "Securely connects to multiple networks with a site-to-site virtual private network", - "description": "Yes" - }, - { - "title": "Simplifies risk management and demonstrates regulatory compliance with Financial Services", - "description": "Yes" - }, - { - "title": "Uses Floating IP address for access through the public internet", - "description": "No" - } - ], - "diagrams": [ - { - "diagram": { - "caption": "IBM Storage Scale", - "url": "https://raw.githubusercontent.com/terraform-ibm-modules/terraform-ibm-landing-zone/main/reference-architectures/vsi-vsi.drawio.svg", - "type": "image/svg+xml" + "key": "zones", + "required": true, + "default_value": ["us-east-1"], + "options": [ + { + "displayname": "Washington DC 1", + "value": ["us-east-1"] + }, + { + "displayname": "Washington DC 2", + "value": ["us-east-2"] + }, + { + "displayname": "Washington DC 3", + "value": ["us-east-3"] + }, + { + "displayname": "Frankfurt 1", + "value": ["eu-de-1"] + }, + { + "displayname": "Frankfurt 2", + "value": ["eu-de-2"] + }, + { + "displayname": "Frankfurt 3", + "value": ["eu-de-3"] + }, + { + "displayname": "Dallas 1", + "value": ["us-south-1"] + }, + { + "displayname": "Dallas 2", + "value": ["us-south-2"] + }, + { + "displayname": "Dallas 3", + "value": ["us-south-3"] + }, + { + "displayname": "Toronto 1", + "value": ["ca-tor-1"] + }, + { + "displayname": "Toronto 2", + "value": ["ca-tor-2"] + }, + { + "displayname": "Toronto 3", + "value": ["ca-tor-3"] + }, + { + "displayname": "Tokyo 1", + "value": ["jp-tok-1"] + }, + { + "displayname": "Tokyo 2", + "value": ["jp-tok-2"] + }, + { + "displayname": "Tokyo 3", + "value": ["jp-tok-3"] + }, + { + "displayname": "London 1", + "value": ["eu-gb-1"] }, - "description": "The HPC variation of the deployable architecture is based on the IBM Cloud for Financial Services reference architecture. The architecture creates a customizable and secure infrastructure, with virtual servers, to run your workloads with a Virtual Private Cloud (VPC) in multizone regions." + { + "displayname": "London 2", + "value": ["eu-gb-2"] + }, + { + "displayname": "London 3", + "value": ["eu-gb-3"] + }, + { + "displayname": "Sydney 1", + "value": ["au-syd-1"] + }, + { + "displayname": "Sydney 2", + "value": ["au-syd-2"] + }, + { + "displayname": "Syndney 3", + "value": ["au-syd-3"] + }, + { + "displayname": "Osaka 1", + "value": ["jp-osa-1"] + }, + { + "displayname": "Osaka 2", + "value": ["jp-osa-2"] + }, + { + "displayname": "Osaka 3", + "value": ["jp-osa-3"] + }, + { + "displayname": "Sao Paulo 1", + "value": ["br-sao-1"] + }, + { + "displayname": "Sao Paulo 2", + "value": ["br-sao-2"] + }, + { + "displayname": "Sao Paulo 3", + "value": ["br-sao-3"] + } + ] + }, + { + "key": "app_center_gui_password", + "required": true + }, + { + "key": "cluster_prefix" + }, + { + "key": "observability_atracker_enable" + }, + { + "key": "observability_atracker_target_type", + "default_value": "cloudlogs", + "options": [ + { + "displayname": "cloudlogs", + "value": "cloudlogs" + }, + { + "displayname": "cos", + "value": "cos" + } + ] + }, + { + "key": "observability_monitoring_enable" + }, + { + "key": "observability_monitoring_on_compute_nodes_enable" + }, + { + "key": "observability_enable_metrics_routing" + }, + { + "key": "observability_enable_platform_logs" + }, + { + "key": "observability_logs_enable_for_compute" + }, + { + "key": "observability_logs_enable_for_management" + }, + { + "key": "observability_logs_retention_period", + "default_value": 7, + "options": [ + { + "displayname": 7, + "value": 7 + }, + { + "displayname": 14, + "value": 14 + }, + { + "displayname": 30, + "value": 30 + }, + { + "displayname": 60, + "value": 60 + }, + { + "displayname": 90, + "value": 90 + } + ] + }, + { + "key": "observability_monitoring_plan", + "default_value": "graduated-tier", + "options": [ + { + "displayname": "graduated-tier", + "value": "graduated-tier" + }, + { + "displayname": "lite", + "value": "lite" + } + ] + }, + { + "key": "enable_vpc_flow_logs" + }, + { + "key": "skip_flowlogs_s2s_auth_policy" + }, + { + "key": "skip_kms_s2s_auth_policy" + }, + { + "key": "scc_enable" + }, + { + "key": "scc_profile", + "default_value": "CIS IBM Cloud Foundations Benchmark v1.1.0", + "options": [ + { + "displayname": "CIS IBM Cloud Foundations Benchmark v1.1.0", + "value": "CIS IBM Cloud Foundations Benchmark v1.1.0" + }, + { + "displayname": "IBM Cloud Framework for Financial Services", + "value": "IBM Cloud Framework for Financial Services" + } + ] + }, + { + "key": "scc_location", + "default_value": "us-south", + "options": [ + { + "displayname": "us-south", + "value": "us-south" + }, + { + "displayname": "eu-de", + "value": "eu-de" + }, + { + "displayname": "ca-tor", + "value": "ca-tor" + }, + { + "displayname": "eu-es", + "value": "eu-es" + } + ] + }, + { + "key": "scc_event_notification_plan", + "default_value": "lite", + "options": [ + { + "displayname": "lite", + "value": "lite" + }, + { + "displayname": "standard", + "value": "standard" + } + ] + }, + { + "key": "bastion_instance", + "type": "object", + "default_value": "{\n \"image\": \"ibm-ubuntu-22-04-5-minimal-amd64-3\",\n \"profile\": \"cx2-4x8\"\n}", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } } - ] - } - } - ] - }, - { - "name": "terraform-ibm-modules-terraform-ibm-hpc-lsf", - "label": "IBM Spectrum LSF (BYOL)", - "product_kind": "solution", - "tags": [ - "Deployable Architecture", - "DA", - "HPC", - "IBM Cloud HPC", - "ibm_created", - "target_terraform", - "terraform", - "reference_architecture", - "solution" - ], - "keywords": [ - "HPC", - "vpc", - "DA", - "Deployable Architecture", - "terraform", - "solution" - ], - "short_description": "Deploy your high performance computing (HPC) cluster with IBM scheduling software for compute intensive workloads.", - "long_description": "**Before you begin deploying IBM Cloud HPC, make sure that you meet the prerequisites listed in [the step-by-step guide](https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-before-you-begin-deploying).**\n\nIBM Cloud HPC is a deployable architecture where you can deploy both cloud compute resources (vCPU resources) and HPC scheduling software for your compute-intensive HPC workloads. You can reserve capacity on a recurring hourly basis from a dedicated IBM Cloud HPC resource pool.", - "provider_name": "IBM", - "offering_docs_url": "https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-overview", - "features": [ - { - "title": "Reserve HPC compute on an hourly basis", - "description": "Reserve HPC compute capacity on a recurring hourly basis. This reserved time slot and capacity cannot be preempted by IBM." - }, - { - "title": "HPC infrastructure bundled with LSF scheduling software", - "description": "Create a cluster with IBM Spectrum LSF in its own subnet and security group for HPC administrators to log in, submit, and manage HPC jobs." - }, - { - "title": "Automatic compute scaling with LSF resource connector", - "description": "Automatically scale up compute resources based on workload demand. Compute resources are automatically scaled down when no longer required." - } - ], - "flavors": [ - { - "label": "IBM Spectrum LSF (BYOL) with VPC File Storage", - "name": "lsf-with-vpc-file-storage", - "install_type": "fullstack", - "working_directory": "solutions/lsf", - "compliance": { - "authority": "scc-v3", - "profiles": [ - { - "profile_name": "IBM Cloud Framework for Financial Services", - "profile_version": "1.5.0" + }, + { + "key": "deployer_instance", + "type": "object", + "default_value": "{\n \"image\": \"hpc-lsf-fp15-deployer-rhel810-v1\",\n \"profile\": \"bx2-8x32\"\n}", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } } - ] - }, - "release_notes_url": "https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-release-notes", - "configuration": [ - ], - "iam_permissions": [ + }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "appid" + "key": "vpc_cidr" }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "cloud-object-storage" + "key": "vpc_cluster_private_subnets_cidr_blocks" }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "hs-crypto" + "key": "vpc_cluster_login_private_subnets_cidr_blocks" }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::role:Administrator" - ], - "service_name": "iam-identity" + "key": "vpc_name" }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "kms" + "key": "cluster_subnet_id" }, { - "role_crns": [ - "crn:v1:bluemix:public:iam::::role:Administrator" - ], - "service_name": "is.vpc" - } - ], - "architecture": { - "descriptions": "", - "features": [ - { - "title": "Separate VPC for HPC workloads", - "description": "Yes" - }, - { - "title": "Virtual Server Instances for every subnet", - "description": "Yes" - }, - { - "title": "Increases security with Key Management", - "description": "Yes" - }, - { - "title": "Reduces failure events by using multizone regions", - "description": "Yes" - }, - { - "title": "Collects and stores Internet Protocol (IP) traffic information with Activity Tracker and Flow Logs", - "description": "Yes" - }, - { - "title": "Securely connects to multiple networks with a site-to-site virtual private network", - "description": "Yes" - }, - { - "title": "Simplifies risk management and demonstrates regulatory compliance with Financial Services", - "description": "Yes" - }, - { - "title": "Uses Floating IP address for access through the public internet", - "description": "No" + "key": "login_subnet_id" + }, + { + "key": "login_instance", + "type": "array", + "default_value": "[\n {\n \"profile\": \"bx2-2x8\",\n \"image\": \"hpc-lsf-fp15-compute-rhel810-v1\"\n }\n]", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } } - ], - "diagrams": [ - { - "diagram": { - "caption": "HPC variation", - "url": "https://raw.githubusercontent.com/terraform-ibm-modules/terraform-ibm-landing-zone/main/reference-architectures/vsi-vsi.drawio.svg", - "type": "image/svg+xml" - }, - "description": "The HPC variation of the deployable architecture is based on the IBM Cloud for Financial Services reference architecture. The architecture creates a customizable and secure infrastructure, with virtual servers, to run your workloads with a Virtual Private Cloud (VPC) in multizone regions." + }, + { + "key": "management_instances", + "type": "array", + "default_value": "[\n {\n \"count\": 2,\n \"profile\": \"bx2-16x64\",\n \"image\": \"hpc-lsf-fp15-rhel810-v1\"\n }\n]", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } } - ] - } - }, - { - "label": "IBM Spectrum LSF (BYOL) with NFS Scale", - "name": "lsf-with-nfs-scale", - "install_type": "fullstack", - "working_directory": "solutions/lsf", - "compliance": { - "authority": "scc-v3", - "profiles": [ - { - "profile_name": "IBM Cloud Framework for Financial Services", - "profile_version": "1.5.0" + }, + { + "key": "static_compute_instances", + "type": "array", + "default_value": "[\n {\n \"count\": 0,\n \"profile\": \"bx2-16x64\",\n \"image\": \"hpc-lsf-fp15-compute-rhel810-v1\"\n }\n]", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } } - ] - }, - "release_notes_url": "https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-release-notes", - "configuration": [ + }, + { + "key": "dynamic_compute_instances", + "type": "array", + "default_value": "[\n {\n \"count\": 500,\n \"profile\": \"bx2-16x64\",\n \"image\": \"hpc-lsf-fp15-compute-rhel810-v1\"\n }\n]", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } + } + }, + { + "key": "enable_dedicated_host" + }, + { + "key": "enable_hyperthreading" + }, + { + "key": "custom_file_shares", + "type": "array", + "default_value": "[\n {\n \"mount_path\": \"/mnt/vpcstorage/tools\",\n \"size\": 100,\n \"iops\": 2000\n },\n {\n \"mount_path\": \"/mnt/vpcstorage/data\",\n \"size\": 100,\n \"iops\": 6000\n },\n {\n \"mount_path\": \"/mnt/scale/tools\",\n \"nfs_share\": \"\"\n }\n]\n", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } + } + }, + { + "key": "storage_security_group_id" + }, + { + "key": "dns_instance_id" + }, + { + "key": "dns_domain_name" + }, + { + "key": "dns_custom_resolver_id" + }, + { + "key": "enable_cos_integration" + }, + { + "key": "cos_instance_name" + }, + { + "key": "key_management" + }, + { + "key": "kms_instance_name" + }, + { + "key": "kms_key_name" + }, + { + "key": "skip_iam_block_storage_authorization_policy" + }, + { + "key": "skip_iam_share_authorization_policy" + }, + { + "key": "vpn_enabled" + }, + { + "key": "enable_ldap" + }, + { + "key": "ldap_basedns" + }, + { + "key": "ldap_server" + }, + { + "key": "ldap_admin_password" + }, + { + "key": "ldap_user_name" + }, + { + "key": "ldap_user_password" + }, + { + "key": "ldap_instance", + "type": "array", + "default_value": "[\n {\n \"profile\": \"cx2-2x4\",\n \"image\": \"ibm-ubuntu-22-04-5-minimal-amd64-3\"\n }\n]", + "required": false, + "custom_config": { + "type": "json_editor", + "grouping": "deployment", + "original_grouping": "deployment", + "config_constraints": { + "type": "mixed" + } + } + }, + { + "key": "ldap_server_cert" + }, + { + "key": "existing_bastion_instance_name" + }, + { + "key": "existing_bastion_instance_public_ip" + }, + { + "key": "existing_bastion_security_group_id" + }, + { + "key": "existing_bastion_ssh_private_key", + "type": "multiline_secure_value", + "required": false, + "custom_config": { + "grouping": "deployment", + "original_grouping": "deployment", + "type": "multiline_secure_value" + } + }, + { + "hidden": true, + "key": "TF_VERSION" + }, + { + "hidden": true, + "key": "TF_PARALLELISM" + }, + { + "hidden": true, + "key": "override" + }, + { + "hidden": true, + "key": "override_json_string" + } ], "iam_permissions": [ { + "service_name": "cloud-object-storage", "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "appid" + "crn:v1:bluemix:public:iam::::serviceRole:Writer", + "crn:v1:bluemix:public:iam::::role:ConfigReader" + ] }, { "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" + "crn:v1:bluemix:public:iam::::serviceRole:Manager", + "crn:v1:bluemix:public:iam::::role:Editor" ], - "service_name": "cloud-object-storage" + "service_name": "dns-svcs" }, { + "service_name": "kms", "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "hs-crypto" + "crn:v1:bluemix:public:iam::::serviceRole:Manager", + "crn:v1:bluemix:public:iam::::role:ConfigReader" + ] }, { + "service_name": "compliance", "role_crns": [ + "crn:v1:bluemix:public:iam::::serviceRole:Manager", "crn:v1:bluemix:public:iam::::role:Administrator" - ], - "service_name": "iam-identity" - }, - { - "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "kms" + ] }, { + "service_name": "secrets-manager", "role_crns": [ + "crn:v1:bluemix:public:iam::::serviceRole:Manager", "crn:v1:bluemix:public:iam::::role:Administrator" - ], - "service_name": "is.vpc" - } - ], - "architecture": { - "descriptions": "", - "features": [ - { - "title": "Separate VPC for HPC workloads", - "description": "Yes" - }, - { - "title": "Virtual Server Instances for every subnet", - "description": "Yes" - }, - { - "title": "Increases security with Key Management", - "description": "Yes" - }, - { - "title": "Reduces failure events by using multizone regions", - "description": "Yes" - }, - { - "title": "Collects and stores Internet Protocol (IP) traffic information with Activity Tracker and Flow Logs", - "description": "Yes" - }, - { - "title": "Securely connects to multiple networks with a site-to-site virtual private network", - "description": "Yes" - }, - { - "title": "Simplifies risk management and demonstrates regulatory compliance with Financial Services", - "description": "Yes" - }, - { - "title": "Uses Floating IP address for access through the public internet", - "description": "No" - } - ], - "diagrams": [ - { - "diagram": { - "caption": "HPC variation", - "url": "https://raw.githubusercontent.com/terraform-ibm-modules/terraform-ibm-landing-zone/main/reference-architectures/vsi-vsi.drawio.svg", - "type": "image/svg+xml" - }, - "description": "The HPC variation of the deployable architecture is based on the IBM Cloud for Financial Services reference architecture. The architecture creates a customizable and secure infrastructure, with virtual servers, to run your workloads with a Virtual Private Cloud (VPC) in multizone regions." - } - ] - } - }, - { - "label": "IBM Spectrum LSF (BYOL) with native Scale", - "name": "lsf-with-native-scale", - "install_type": "fullstack", - "working_directory": "solutions/lsf", - "compliance": { - "authority": "scc-v3", - "profiles": [ - { - "profile_name": "IBM Cloud Framework for Financial Services", - "profile_version": "1.5.0" - } - ] - }, - "release_notes_url": "https://cloud.ibm.com/docs/allowlist/hpc-service?topic=hpc-service-release-notes", - "configuration": [ - ], - "iam_permissions": [ + ] + }, { + "service_name": "is.share", "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "appid" + "crn:v1:bluemix:public:iam::::role:Editor" + ] }, { + "service_name": "iam-identity", "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "cloud-object-storage" + "crn:v1:bluemix:public:iam::::role:Administrator" + ] }, { + "service_name": "databases-for-mysql", "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "hs-crypto" + "crn:v1:bluemix:public:iam::::role:Editor" + ] }, { "role_crns": [ - "crn:v1:bluemix:public:iam::::role:Administrator" + "crn:v1:bluemix:public:iam::::role:Editor" ], - "service_name": "iam-identity" + "service_name": "is.vpc" }, { + "service_name": "is.flow-log-collector", "role_crns": [ - "crn:v1:bluemix:public:iam::::serviceRole:Manager" - ], - "service_name": "kms" + "crn:v1:bluemix:public:iam::::role:Editor" + ] }, { + "service_name": "sysdig-monitor", "role_crns": [ + "crn:v1:bluemix:public:iam::::serviceRole:Manager", "crn:v1:bluemix:public:iam::::role:Administrator" - ], - "service_name": "is.vpc" + ] } ], "architecture": { @@ -467,7 +666,7 @@ }, { "title": "Reduces failure events by using multizone regions", - "description": "Yes" + "description": "No" }, { "title": "Collects and stores Internet Protocol (IP) traffic information with Activity Tracker and Flow Logs", @@ -478,28 +677,27 @@ "description": "Yes" }, { - "title": "Simplifies risk management and demonstrates regulatory compliance with Financial Services", + "title": "Simplifies risk management and demonstrates regulatory compliance with CIS IBM Cloud Foundations Benchmark Services", "description": "Yes" }, { "title": "Uses Floating IP address for access through the public internet", - "description": "No" + "description": "Yes" } ], "diagrams": [ { "diagram": { - "caption": "HPC variation", - "url": "https://raw.githubusercontent.com/terraform-ibm-modules/terraform-ibm-landing-zone/main/reference-architectures/vsi-vsi.drawio.svg", + "caption": "IBM Spectrum LSF", + "url": "https://raw.githubusercontent.com/terraform-ibm-modules/terraform-ibm-hpc/refs/heads/schematics-testing/LSF_DA_New.drawio.svg", "type": "image/svg+xml" }, - "description": "The HPC variation of the deployable architecture is based on the IBM Cloud for Financial Services reference architecture. The architecture creates a customizable and secure infrastructure, with virtual servers, to run your workloads with a Virtual Private Cloud (VPC) in multizone regions." + "description": "This deployable architecture sets up a VPC on IBM Cloud to run HPC workloads within a single zone. A login node is deployed in a dedicated subnet and security group to facilitate secure access to the HPC environment. The HPC management nodes and static compute nodes reside in a separate subnet and security group.\nClusters of virtual server instances are provisioned with the IBM Spectrum LSF scheduler pre-installed for HPC workload job management. The LSF scheduler dynamically provisions compute nodes as needed and removes them once jobs are completed.\nThe solution supports either IBM Cloud File Storage for VPC or a dedicated clustered shared file system using IBM Storage Scale which is a high performance, highly available, clustered file system with advanced features like File Audit Logging for security and Active File Management for hybrid cloud connectivity. IBM Storage Scale provides more performance and scalability than standard file storage solutions." } ] } } ] } - ] } diff --git a/locals.tf b/locals.tf index 7e7a0d81..70a4ef4a 100644 --- a/locals.tf +++ b/locals.tf @@ -32,6 +32,7 @@ locals { comp_mgmt_instances = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].compute_management_vsi_data]) storage_instances = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].storage_vsi_data]) storage_servers = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].storage_bms_data]) + storage_tie_brkr_bm = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].storage_tie_breaker_bms_data]) protocol_instances = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].protocol_vsi_data]) gklm_instances = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].gklm_vsi_data]) client_instances = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].client_vsi_data]) @@ -39,6 +40,10 @@ locals { ldap_instances = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].ldap_vsi_data]) tie_brkr_instances = var.enable_deployer ? [] : flatten(module.landing_zone_vsi[0].storage_cluster_tie_breaker_vsi_data) strg_mgmt_instances = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].storage_cluster_management_vsi]) + login_instance = var.enable_deployer ? [] : flatten(module.landing_zone_vsi[0].login_vsi_data) + + storage_bm_name_with_vol_mapping = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].storage_bm_name_with_vol_mapping]) + storage_tie_breaker_bms_name_with_vol_mapping = var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].storage_tie_breaker_bms_name_with_vol_mapping]) management_instance_count = sum(var.management_instances[*]["count"]) storage_instance_count = var.storage_type == "persistent" ? sum(var.storage_servers[*]["count"]) : sum(var.storage_instances[*]["count"]) @@ -51,10 +56,9 @@ locals { # locals needed for landing_zone_vsi locals { # dependency: landing_zone -> deployer -> landing_zone_vsi - bastion_security_group_id = module.deployer.bastion_security_group_id - bastion_public_key_content = module.deployer.bastion_public_key_content - bastion_private_key_content = module.deployer.bastion_private_key_content - bastion_security_group_id_for_ref = module.deployer.bastion_security_group_id_for_ref + bastion_security_group_id = module.deployer.bastion_security_group_id + bastion_public_key_content = module.deployer.bastion_public_key_content + bastion_private_key_content = module.deployer.bastion_private_key_content deployer_hostname = var.enable_deployer ? flatten(module.deployer.deployer_vsi_data[*].list)[0].name : "" deployer_ip = module.deployer.deployer_ip @@ -112,14 +116,14 @@ locals { # dependency: landing_zone -> landing_zone_vsi client_subnets = var.vpc_name != null && var.client_subnets != null ? local.existing_client_subnets : module.landing_zone.client_subnets - cluster_subnets = var.vpc_name != null && var.cluster_subnet_ids != null ? local.existing_cluster_subnets : module.landing_zone.compute_subnets + cluster_subnets = var.vpc_name != null && var.cluster_subnet_id != null ? local.existing_cluster_subnets : module.landing_zone.compute_subnets storage_subnets = var.vpc_name != null && var.storage_subnets != null ? local.existing_storage_subnets : module.landing_zone.storage_subnets protocol_subnets = var.vpc_name != null && var.protocol_subnets != null ? local.existing_protocol_subnets : module.landing_zone.protocol_subnets login_subnets = var.vpc_name != null && var.login_subnet_id != null ? local.existing_login_subnets : module.landing_zone.bastion_subnets storage_subnet = [for subnet in local.storage_subnets : subnet.name] protocol_subnet = [for subnet in local.protocol_subnets : subnet.name] - protocol_subnet_id = local.protocol_instance_count > 0 ? [for subnet in local.protocol_subnets : subnet.id][0] : "" + protocol_subnet_id = var.enable_deployer ? "" : local.protocol_instance_count > 0 ? [for subnet in local.protocol_subnets : subnet.id][0] : "" cluster_subnet = [for subnet in local.cluster_subnets : subnet.id][0] client_subnet = [for subnet in local.client_subnets : subnet.name] login_subnet = [for subnet in local.login_subnets : subnet.id][0] @@ -131,9 +135,9 @@ locals { # locals needed for file-storage locals { # dependency: landing_zone_vsi -> file-share - compute_subnet_id = var.vpc_name == null && var.cluster_subnet_ids == null ? local.cluster_subnets[0].id : [for subnet in data.ibm_is_subnet.existing_cluster_subnets : subnet.id][0] + compute_subnet_id = (var.vpc_name == null && var.cluster_subnet_id == null ? local.cluster_subnets[0].id : (var.vpc_name != null && var.cluster_subnet_id != null ? [for subnet in data.ibm_is_subnet.existing_cluster_subnets : subnet.id][0] : (var.vpc_name != null && var.cluster_subnet_id == null ? local.cluster_subnets[0].id : ""))) bastion_subnet_id = (var.enable_deployer && var.vpc_name != null && var.login_subnet_id != null) ? local.existing_login_subnets[0].id : "" - subnet_id = (var.enable_deployer && var.vpc_name != null && var.cluster_subnet_ids != null) ? local.existing_cluster_subnets[0].id : "" + subnet_id = (var.enable_deployer && var.vpc_name != null && var.cluster_subnet_id != null) ? local.existing_cluster_subnets[0].id : "" compute_security_group_id = var.enable_deployer ? [] : module.landing_zone_vsi[0].compute_sg_id nfs_shares_map = { @@ -248,7 +252,7 @@ locals { existing_client_subnet_crns = [for subnet in data.ibm_is_subnet.existing_client_subnets : subnet.crn] existing_bastion_subnet_crns = [for subnet in data.ibm_is_subnet.existing_login_subnets : subnet.crn] subnets_crn = concat(local.existing_compute_subnet_crns, local.existing_storage_subnet_crns, local.existing_protocol_subnet_crns, local.existing_client_subnet_crns, local.existing_bastion_subnet_crns) - # subnets_crn = var.vpc_name == null && var.cluster_subnet_ids == null ? module.landing_zone.subnets_crn : concat(local.existing_subnet_crns, module.landing_zone.subnets_crn) + # subnets_crn = var.vpc_name == null && var.cluster_subnet_id == null ? module.landing_zone.subnets_crn : concat(local.existing_subnet_crns, module.landing_zone.subnets_crn) # subnets = flatten([local.cluster_subnets, local.storage_subnets, local.protocol_subnets]) # subnets_crns = data.ibm_is_subnet.itself[*].crn # subnets_crn = module.landing_zone.subnets_crn @@ -287,15 +291,33 @@ locals { } ] + raw_bm_storage_servers_dns_record_details = [ + for server in local.storage_servers : { + id = server.id + ipv4_address = server.ipv4_address[0] + name = server.name + vni_id = server.vni_id + } + ] + + raw_bm_tie_breaker_dns_record_details = [ + for server in local.storage_tie_brkr_bm : { + id = server.id + ipv4_address = server.ipv4_address[0] + name = server.name + vni_id = server.vni_id + } + ] + compute_dns_records = [ - for instance in concat(local.compute_instances, local.comp_mgmt_instances, local.deployer_instances) : + for instance in concat(local.compute_instances, local.comp_mgmt_instances, local.deployer_instances, local.login_instance) : { name = instance["name"] rdata = instance["ipv4_address"] } ] storage_dns_records = [ - for instance in concat(local.storage_instances, local.protocol_instances, local.afm_instances, local.tie_brkr_instances, local.strg_mgmt_instances, local.storage_servers) : + for instance in concat(local.storage_instances, local.protocol_instances, local.afm_instances, local.tie_brkr_instances, local.strg_mgmt_instances, local.raw_bm_storage_servers_dns_record_details, local.raw_bm_tie_breaker_dns_record_details) : { name = instance["name"] rdata = instance["ipv4_address"] @@ -321,25 +343,27 @@ locals { locals { compute_hosts = try([for name in local.compute_instances[*]["name"] : "${name}.${var.dns_domain_names["compute"]}"], []) # storage_hosts = try([for name in local.storage_instances[*]["name"] : "${name}.${var.dns_domain_names["storage"]}"], []) - ldap_hosts = try([for instance in local.ldap_instances : instance["ipv4_address"]], []) - compute_inventory_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/compute.ini" : "${path.root}/modules/ansible-roles/compute.ini" - compute_hosts_inventory_path = var.enable_bastion ? "${path.root}/../../solutions/lsf/compute_hosts.ini" : "${path.root}/solutions/lsf/compute_hosts.ini" - mgmt_hosts_inventory_path = var.enable_bastion ? "${path.root}/../../solutions/lsf/mgmt_hosts.ini" : "${path.root}/solutions/lsf/mgmt_hosts.ini" - bastion_hosts_inventory_path = var.enable_bastion ? "${path.root}/../../solutions/lsf/bastion_hosts.ini" : "${path.root}/solutions/lsf/bastion_hosts.ini" - ldap_hosts_inventory_path = var.enable_bastion ? "${path.root}/../../solutions/lsf/ldap_hosts.ini" : "${path.root}/solutions/lsf/ldap_hosts.ini" - # storage_inventory_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/storage.ini" : "${path.root}/modules/ansible-roles/storage.ini" + ldap_hosts = try([for instance in local.ldap_instances : instance["ipv4_address"]], []) + login_host_ip = try([for instance in local.login_instance : instance["ipv4_address"]], []) + compute_inventory_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/compute.ini" : "${path.root}/modules/ansible-roles/compute.ini" + compute_hosts_inventory_path = var.enable_deployer ? "${path.root}/../../solutions/lsf/compute_hosts.ini" : "${path.root}/solutions/lsf/compute_hosts.ini" + mgmt_hosts_inventory_path = var.enable_deployer ? "${path.root}/../../solutions/lsf/mgmt_hosts.ini" : "${path.root}/solutions/lsf/mgmt_hosts.ini" + bastion_hosts_inventory_path = var.enable_deployer ? "${path.root}/../../solutions/lsf/bastion_hosts.ini" : "${path.root}/solutions/lsf/bastion_hosts.ini" + deployer_hosts_inventory_path = var.enable_deployer ? "${path.root}/../../solutions/lsf/deployer_hosts.ini" : "${path.root}/solutions/lsf/deployer_hosts.ini" + ldap_hosts_inventory_path = var.enable_deployer ? "${path.root}/../../solutions/lsf/ldap_hosts.ini" : "${path.root}/solutions/lsf/ldap_hosts.ini" + login_host_inventory_path = var.enable_deployer ? "${path.root}/../../solutions/lsf/login_host.ini" : "${path.root}/solutions/lsf/login_host.ini" + # storage_inventory_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/storage.ini" : "${path.root}/modules/ansible-roles/storage.ini" } # locals needed for playbook locals { bastion_fip = module.deployer.bastion_fip - compute_private_key_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/compute_id_rsa" : "${path.root}/modules/ansible-roles/compute_id_rsa" #checkov:skip=CKV_SECRET_6 - # storage_private_key_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/storage_id_rsa" : "${path.root}/modules/ansible-roles/storage_id_rsa" #checkov:skip=CKV_SECRET_6 - compute_playbook_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/compute_ssh.yaml" : "${path.root}/modules/ansible-roles/compute_ssh.yaml" - observability_playbook_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/observability.yaml" : "${path.root}/modules/ansible-roles/observability.yaml" - lsf_mgmt_playbooks_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/lsf_mgmt_config.yml" : "${path.root}/modules/ansible-roles/lsf_mgmt_config.yml" - playbooks_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/" : "${path.root}/modules/ansible-roles" - # storage_playbook_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/storage_ssh.yaml" : "${path.root}/modules/ansible-roles/storage_ssh.yaml" + compute_private_key_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/compute_id_rsa" : "${path.root}/modules/ansible-roles/compute_id_rsa" #checkov:skip=CKV_SECRET_6 + # storage_private_key_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/storage_id_rsa" : "${path.root}/modules/ansible-roles/storage_id_rsa" #checkov:skip=CKV_SECRET_6 + observability_playbook_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/observability.yaml" : "${path.root}/modules/ansible-roles/observability.yaml" + lsf_mgmt_playbooks_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/lsf_mgmt_config.yml" : "${path.root}/modules/ansible-roles/lsf_mgmt_config.yml" + playbooks_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/" : "${path.root}/modules/ansible-roles" + # storage_playbook_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/storage_ssh.yaml" : "${path.root}/modules/ansible-roles/storage_ssh.yaml" } # file Share OutPut @@ -353,10 +377,12 @@ locals { compute_hosts_ips = var.scheduler == "LSF" ? var.enable_deployer ? [] : local.compute_instances_data[*]["ipv4_address"] : [] # bastion_instances_data = var.scheduler == "LSF" ? var.enable_deployer ? flatten([module.deployer.bastion_vsi_data]) : [] : [] bastion_hosts_ips = var.scheduler == "LSF" ? var.enable_deployer ? [module.deployer.bastion_fip] : [] : [] + deployer_hosts_ips = var.scheduler == "LSF" ? var.enable_deployer ? [module.deployer.deployer_ip] : [] : [] mgmt_instances_data = var.scheduler == "LSF" ? var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].management_vsi_data]) : [] mgmt_hosts_ips = var.scheduler == "LSF" ? var.enable_deployer ? [] : local.mgmt_instances_data[*]["ipv4_address"] : [] - json_inventory_path = var.enable_bastion ? "${path.root}/../../modules/ansible-roles/all.json" : "${path.root}/modules/ansible-roles/all.json" + json_inventory_path = var.enable_deployer ? "${path.root}/../../modules/ansible-roles/all.json" : "${path.root}/modules/ansible-roles/all.json" management_nodes = var.scheduler == "LSF" ? var.enable_deployer ? [] : (flatten([module.landing_zone_vsi[0].management_vsi_data]))[*]["name"] : [] + login_host = var.scheduler == "LSF" ? var.enable_deployer ? [] : (flatten([module.landing_zone_vsi[0].login_vsi_data]))[*]["name"] : [] compute_nodes = var.scheduler == "LSF" ? ( var.enable_deployer ? [] : flatten([module.landing_zone_vsi[0].compute_vsi_data])[*]["name"] ) : [] @@ -399,10 +425,15 @@ locals { scc_cos_bucket = length(module.landing_zone.cos_buckets_names) > 0 && var.scc_enable ? [for name in module.landing_zone.cos_buckets_names : name if strcontains(name, "scc-bucket")][0] : "" scc_cos_instance_crn = length(module.landing_zone.cos_instance_crns) > 0 && var.scc_enable ? module.landing_zone.cos_instance_crns[0] : "" - compute_subnet_crn = var.enable_deployer ? "" : data.ibm_is_subnet.compute_subnet_crn.crn + compute_subnet_crn = var.enable_deployer ? "" : data.ibm_is_subnet.compute_subnet_crn[0].crn ssh_keys_ids = var.enable_deployer ? [] : [for name in var.ssh_keys : data.ibm_is_ssh_key.ssh_keys[name].id] compute_public_key_content = var.enable_deployer ? "" : jsonencode(base64encode(join("", flatten([module.landing_zone_vsi[0].compute_public_key_content])))) compute_private_key_content = var.enable_deployer ? "" : jsonencode(base64encode(join("", flatten([module.landing_zone_vsi[0].compute_private_key_content])))) + + mgmnt_host_entry = var.scheduler == "LSF" ? { for vsi in flatten([module.landing_zone_vsi[*].management_vsi_data]) : vsi.ipv4_address => vsi.name } : {} + comp_host_entry = var.scheduler == "LSF" ? { for vsi in flatten([module.landing_zone_vsi[*].compute_vsi_data]) : vsi.ipv4_address => vsi.name } : {} + login_host_entry = var.scheduler == "LSF" ? { for vsi in flatten([module.landing_zone_vsi[*].login_vsi_data]) : vsi.ipv4_address => vsi.name } : {} + deployer_host_entry = var.scheduler == "LSF" ? { for inst in local.deployer_instances : inst.ipv4_address => inst.name if inst.ipv4_address != null } : {} } locals { @@ -442,6 +473,10 @@ locals { strg_servers_ids = flatten(local.storage_servers[*]["id"]) strg_servers_names = try(tolist([for name_details in flatten(local.storage_servers[*]["name"]) : "${name_details}.${var.dns_domain_names["storage"]}"]), []) + bm_tie_breaker_private_ips = flatten(local.storage_tie_brkr_bm[*]["ipv4_address"]) + bm_tie_breaker_ids = flatten(local.storage_tie_brkr_bm[*]["id"]) + bm_tie_breaker_names = try(tolist([for name_details in flatten(local.storage_tie_brkr_bm[*]["name"]) : "${name_details}.${var.dns_domain_names["storage"]}"]), []) + strg_mgmt_instance_private_ips = flatten(local.strg_mgmt_instances[*]["ipv4_address"]) strg_mgmtt_instance_ids = flatten(local.strg_mgmt_instances[*]["id"]) strg_mgmt_instance_names = try(tolist([for name_details in flatten(local.strg_mgmt_instances[*]["name"]) : "${name_details}.${var.dns_domain_names["storage"]}"]), []) @@ -493,7 +528,7 @@ locals { storage_instance_private_ips = var.storage_type != "persistent" ? local.enable_afm == true ? concat(local.strg_instance_private_ips, local.afm_instance_private_ips) : local.strg_instance_private_ips : [] storage_instance_ids = var.storage_type != "persistent" ? local.enable_afm == true ? concat(local.strg_instance_ids, local.afm_instance_ids) : local.strg_instance_ids : [] storage_instance_names = var.storage_type != "persistent" ? local.enable_afm == true ? concat(local.strg_instance_names, local.afm_instance_names) : local.strg_instance_names : [] - storage_ips_with_vol_mapping = module.landing_zone_vsi[*].instance_ips_with_vol_mapping + storage_ips_with_vol_mapping = var.storage_type != "persistent" ? module.landing_zone_vsi[*].instance_ips_with_vol_mapping : local.storage_bm_name_with_vol_mapping storage_cluster_instance_private_ips = local.scale_ces_enabled == false ? local.storage_instance_private_ips : concat(local.storage_instance_private_ips, local.protocol_instance_private_ips) storage_cluster_instance_ids = local.scale_ces_enabled == false ? local.storage_instance_ids : concat(local.storage_instance_ids, local.protocol_instance_ids) @@ -507,13 +542,13 @@ locals { baremetal_cluster_instance_ids = var.storage_type == "persistent" && local.scale_ces_enabled == false ? local.baremetal_instance_ids : concat(local.baremetal_instance_ids, local.protocol_instance_ids) baremetal_cluster_instance_names = var.storage_type == "persistent" && local.scale_ces_enabled == false ? local.baremetal_instance_names : concat(local.baremetal_instance_names, local.protocol_instance_names) - tie_breaker_storage_instance_private_ips = var.storage_type != "persistent" ? local.strg_tie_breaker_private_ips : local.baremetal_instance_private_ips - tie_breaker_storage_instance_ids = var.storage_type != "persistent" ? local.strg_tie_breaker_instance_ids : local.baremetal_instance_ids - tie_breaker_storage_instance_names = var.storage_type != "persistent" ? local.strg_tie_breaker_instance_names : local.baremetal_instance_names - tie_breaker_ips_with_vol_mapping = module.landing_zone_vsi[*].instance_ips_with_vol_mapping_tie_breaker + tie_breaker_storage_instance_private_ips = var.storage_type != "persistent" ? local.strg_tie_breaker_private_ips : local.bm_tie_breaker_private_ips + tie_breaker_storage_instance_ids = var.storage_type != "persistent" ? local.strg_tie_breaker_instance_ids : local.bm_tie_breaker_ids + tie_breaker_storage_instance_names = var.storage_type != "persistent" ? local.strg_tie_breaker_instance_names : local.bm_tie_breaker_names + tie_breaker_ips_with_vol_mapping = var.storage_type != "persistent" ? module.landing_zone_vsi[*].instance_ips_with_vol_mapping_tie_breaker : local.storage_tie_breaker_bms_name_with_vol_mapping storage_subnet_cidr = var.enable_deployer ? "" : local.storage_instance_count > 0 ? jsonencode((data.ibm_is_subnet.existing_storage_subnets[*].ipv4_cidr_block)[0]) : "" - cluster_subnet_cidr = var.enable_deployer ? "" : jsonencode((data.ibm_is_subnet.existing_cluster_subnets[*].ipv4_cidr_block)[0]) + cluster_subnet_cidr = var.enable_deployer ? "" : local.static_compute_instance_count > 0 ? jsonencode((data.ibm_is_subnet.existing_cluster_subnets[*].ipv4_cidr_block)[0]) : "" client_subnet_cidr = var.enable_deployer ? "" : local.client_instance_count > 0 ? jsonencode((data.ibm_is_subnet.existing_client_subnets[*].ipv4_cidr_block)[0]) : "" compute_memory = data.ibm_is_instance_profile.compute_profile.memory[0].value @@ -527,25 +562,32 @@ locals { storage_desc_bandwidth = data.ibm_is_instance_profile.storage_profile.bandwidth[0].value storage_memory = var.storage_type == "persistent" ? data.ibm_is_bare_metal_server_profile.storage_bms_profile.memory[0].value : data.ibm_is_instance_profile.storage_profile.memory[0].value storage_vcpus_count = var.storage_type == "persistent" ? data.ibm_is_bare_metal_server_profile.storage_bms_profile.cpu_core_count[0].value : data.ibm_is_instance_profile.storage_profile.vcpu_count[0].value - storage_bandwidth = var.storage_type == "persistent" ? data.ibm_is_bare_metal_server_profile.storage_bms_profile.bandwidth[0].value : data.ibm_is_instance_profile.storage_profile.bandwidth[0].value + storage_bandwidth = var.storage_type == "persistent" ? local.sapphire_rapids_profile_check == true ? 200000 : 100000 : data.ibm_is_instance_profile.storage_profile.bandwidth[0].value protocol_memory = (local.scale_ces_enabled == true && var.colocate_protocol_instances == false) ? local.ces_server_type == false ? data.ibm_is_instance_profile.protocol_profile[0].memory[0].value : jsonencode(0) : jsonencode(0) protocol_vcpus_count = (local.scale_ces_enabled == true && var.colocate_protocol_instances == false) ? local.ces_server_type == false ? data.ibm_is_instance_profile.protocol_profile[0].vcpu_count[0].value : jsonencode(0) : jsonencode(0) protocol_bandwidth = (local.scale_ces_enabled == true && var.colocate_protocol_instances == false) ? local.ces_server_type == false ? data.ibm_is_instance_profile.protocol_profile[0].bandwidth[0].value : jsonencode(0) : jsonencode(0) storage_protocol_memory = var.storage_type == "persistent" ? data.ibm_is_bare_metal_server_profile.storage_bms_profile.memory[0].value : data.ibm_is_instance_profile.storage_profile.memory[0].value storage_protocol_vcpus_count = var.storage_type == "persistent" ? data.ibm_is_bare_metal_server_profile.storage_bms_profile.cpu_core_count[0].value : data.ibm_is_instance_profile.storage_profile.vcpu_count[0].value - storage_protocol_bandwidth = var.storage_type == "persistent" ? data.ibm_is_bare_metal_server_profile.storage_bms_profile.bandwidth[0].value : data.ibm_is_instance_profile.storage_profile.bandwidth[0].value + storage_protocol_bandwidth = var.storage_type == "persistent" ? local.sapphire_rapids_profile_check == true ? 200000 : 100000 : data.ibm_is_instance_profile.storage_profile.bandwidth[0].value afm_memory = local.afm_server_type == true ? jsonencode("") : data.ibm_is_instance_profile.afm_server_profile[0].memory[0].value afm_vcpus_count = local.afm_server_type == true ? jsonencode("") : data.ibm_is_instance_profile.afm_server_profile[0].vcpu_count[0].value afm_bandwidth = local.afm_server_type == true ? jsonencode("") : data.ibm_is_instance_profile.afm_server_profile[0].bandwidth[0].value protocol_reserved_name_ips_map = try({ for details in data.ibm_is_subnet_reserved_ips.protocol_subnet_reserved_ips[0].reserved_ips : details.name => details.address }, {}) - protocol_subnet_gateway_ip = local.scale_ces_enabled == true ? local.protocol_reserved_name_ips_map.ibm-default-gateway : "" + protocol_subnet_gateway_ip = var.enable_deployer ? "" : local.scale_ces_enabled == true ? local.protocol_reserved_name_ips_map.ibm-default-gateway : "" } # Existing bastion Variables locals { - # bastion_instance_name = var.bastion_instance_name != null ? var.bastion_instance_name : null bastion_instance_public_ip = var.existing_bastion_instance_name != null ? var.existing_bastion_instance_public_ip : null - # bastion_security_group_id = var.bastion_instance_name != null ? var.bastion_security_group_id : module.bootstrap.bastion_security_group_id - bastion_ssh_private_key = var.existing_bastion_instance_name != null ? var.existing_bastion_ssh_private_key : null + bastion_ssh_private_key = var.existing_bastion_instance_name != null ? var.existing_bastion_ssh_private_key : null + sapphire_rapids_profile_check = [ + for server in var.storage_servers : + strcontains(server.profile, "3-metal") || strcontains(server.profile, "3d-metal") + ] +} + +locals { + existing_vpc_cidr = var.vpc_name != null ? data.ibm_is_vpc_address_prefixes.existing_vpc_cidr[0].address_prefixes[0].cidr : null + cluster_cidr = var.vpc_name == null ? var.vpc_cidr : local.existing_vpc_cidr } diff --git a/main.tf b/main.tf index 85a554d9..d398aa9c 100644 --- a/main.tf +++ b/main.tf @@ -1,7 +1,6 @@ module "landing_zone" { source = "./modules/landing_zone" enable_landing_zone = var.enable_landing_zone - allowed_cidr = var.remote_allowed_ips vpc_cluster_private_subnets_cidr_blocks = [var.vpc_cluster_private_subnets_cidr_blocks] cos_instance_name = var.cos_instance_name bastion_subnet_id = local.bastion_subnet_id @@ -16,7 +15,7 @@ module "landing_zone" { vpc_cluster_login_private_subnets_cidr_blocks = var.vpc_cluster_login_private_subnets_cidr_blocks management_instances = var.management_instances compute_instances = var.static_compute_instances - network_cidr = var.vpc_cidr + cluster_cidr = local.cluster_cidr placement_strategy = var.placement_strategy prefix = var.cluster_prefix protocol_instances = var.protocol_instances @@ -35,80 +34,89 @@ module "landing_zone" { skip_flowlogs_s2s_auth_policy = var.skip_flowlogs_s2s_auth_policy skip_kms_s2s_auth_policy = var.skip_kms_s2s_auth_policy observability_logs_enable = var.observability_logs_enable_for_management || var.observability_logs_enable_for_compute || (var.observability_atracker_enable && var.observability_atracker_target_type == "cloudlogs") ? true : false + scale_encryption_type = var.scale_encryption_type + scale_encryption_enabled = var.scale_encryption_enabled + key_protect_instance_id = var.key_protect_instance_id # hpcs_instance_name = var.hpcs_instance_name # clusters = var.clusters } module "deployer" { - source = "./modules/deployer" - scheduler = var.scheduler - resource_group = local.resource_group_ids["workload_rg"] - prefix = var.cluster_prefix - vpc_id = local.vpc_id - network_cidr = var.vpc_cidr - enable_bastion = var.enable_bastion - bastion_subnets = local.login_subnets - bastion_image = var.bastion_image - bastion_instance_profile = var.bastion_instance_profile - enable_deployer = var.enable_deployer - deployer_image = var.deployer_image - deployer_instance_profile = var.deployer_instance_profile - ssh_keys = var.ssh_keys - allowed_cidr = var.remote_allowed_ips - kms_encryption_enabled = local.kms_encryption_enabled - boot_volume_encryption_key = local.boot_volume_encryption_key - existing_kms_instance_guid = local.existing_kms_instance_guid - dns_domain_names = var.dns_domain_names - skip_iam_authorization_policy = var.skip_iam_authorization_policy - bastion_instance_name = var.existing_bastion_instance_name - bastion_instance_public_ip = local.bastion_instance_public_ip - bastion_security_group_id = var.existing_bastion_instance_name != null ? var.existing_bastion_security_group_id : null + source = "./modules/deployer" + scheduler = var.scheduler + resource_group = local.resource_group_ids["workload_rg"] + prefix = var.cluster_prefix + vpc_id = local.vpc_id + zones = var.zones + cluster_cidr = local.cluster_cidr + ext_login_subnet_id = var.login_subnet_id + bastion_subnets = local.login_subnets + ext_cluster_subnet_id = var.cluster_subnet_id + cluster_subnets = local.cluster_subnets + bastion_instance = var.bastion_instance + enable_deployer = var.enable_deployer + deployer_instance = var.deployer_instance + ssh_keys = var.ssh_keys + allowed_cidr = var.remote_allowed_ips + kms_encryption_enabled = local.kms_encryption_enabled + boot_volume_encryption_key = local.boot_volume_encryption_key + existing_kms_instance_guid = local.existing_kms_instance_guid + dns_domain_names = var.dns_domain_names + skip_iam_authorization_policy = var.skip_iam_block_storage_authorization_policy + ext_vpc_name = var.vpc_name + bastion_instance_name = var.existing_bastion_instance_name + bastion_instance_public_ip = local.bastion_instance_public_ip + existing_bastion_security_group_id = var.existing_bastion_instance_name != null ? var.existing_bastion_security_group_id : null } module "landing_zone_vsi" { - count = var.enable_deployer == false ? 1 : 0 - source = "./modules/landing_zone_vsi" - resource_group = var.resource_group_ids["workload_rg"] - prefix = var.cluster_prefix - vpc_id = local.vpc_id - zones = var.zones - bastion_security_group_id = var.bastion_security_group_id - bastion_security_group_id_for_ref = var.bastion_security_group_id_for_ref - bastion_public_key_content = local.bastion_public_key_content - ssh_keys = var.ssh_keys - client_subnets = local.client_subnets - client_instances = var.client_instances - cluster_subnet_ids = local.cluster_subnets - management_instances = var.management_instances - static_compute_instances = var.static_compute_instances - dynamic_compute_instances = var.dynamic_compute_instances - storage_subnets = local.storage_subnets - storage_instances = var.storage_instances - storage_servers = var.storage_servers - storage_type = var.storage_type - protocol_subnets = local.protocol_subnets - protocol_instances = var.protocol_instances - nsd_details = var.nsd_details - dns_domain_names = var.dns_domain_names - kms_encryption_enabled = local.kms_encryption_enabled - boot_volume_encryption_key = var.boot_volume_encryption_key - existing_kms_instance_guid = var.existing_kms_instance_guid - enable_bastion = var.enable_bastion - afm_instances = var.afm_instances - enable_dedicated_host = var.enable_dedicated_host - enable_ldap = var.enable_ldap - ldap_instances = var.ldap_instances - ldap_server = local.ldap_server - ldap_instance_key_pair = local.ldap_instance_key_pair - scale_encryption_enabled = var.scale_encryption_enabled - scale_encryption_type = var.scale_encryption_type - gklm_instance_key_pair = local.gklm_instance_key_pair - gklm_instances = var.gklm_instances - vpc_region = local.region - scheduler = var.scheduler - ibm_customer_number = var.ibm_customer_number - colocate_protocol_instances = var.colocate_protocol_instances - storage_security_group_id = var.storage_security_group_id + count = var.enable_deployer == false ? 1 : 0 + source = "./modules/landing_zone_vsi" + resource_group = var.resource_group_ids["workload_rg"] + prefix = var.cluster_prefix + vpc_id = local.vpc_id + zones = var.zones + bastion_security_group_id = var.bastion_security_group_id + bastion_public_key_content = local.bastion_public_key_content + ssh_keys = var.ssh_keys + client_subnets = local.client_subnets + client_instances = var.client_instances + cluster_subnet_id = local.cluster_subnets + management_instances = var.management_instances + static_compute_instances = var.static_compute_instances + dynamic_compute_instances = var.dynamic_compute_instances + storage_subnets = local.storage_subnets + storage_instances = var.storage_instances + storage_servers = var.storage_servers + storage_type = var.storage_type + protocol_subnets = local.protocol_subnets + protocol_instances = var.protocol_instances + nsd_details = var.nsd_details + dns_domain_names = var.dns_domain_names + kms_encryption_enabled = local.kms_encryption_enabled + boot_volume_encryption_key = var.boot_volume_encryption_key + existing_kms_instance_guid = var.existing_kms_instance_guid + enable_deployer = var.enable_deployer + afm_instances = var.afm_instances + enable_dedicated_host = var.enable_dedicated_host + enable_ldap = var.enable_ldap + ldap_instances = var.ldap_instance + ldap_server = local.ldap_server + ldap_instance_key_pair = local.ldap_instance_key_pair + scale_encryption_enabled = var.scale_encryption_enabled + scale_encryption_type = var.scale_encryption_type + gklm_instance_key_pair = local.gklm_instance_key_pair + gklm_instances = var.gklm_instances + vpc_region = local.region + scheduler = var.scheduler + ibm_customer_number = var.ibm_customer_number + colocate_protocol_instances = var.colocate_protocol_instances + storage_security_group_id = var.storage_security_group_id + login_instance = var.login_instance + bastion_subnets = local.login_subnets + cluster_cidr = local.cluster_cidr + bms_boot_drive_encryption = var.bms_boot_drive_encryption + tie_breaker_bm_server = var.tie_breaker_bm_server } module "prepare_tf_input" { @@ -118,6 +126,7 @@ module "prepare_tf_input" { deployer_ip = local.deployer_ip bastion_fip = local.bastion_fip ibmcloud_api_key = var.ibmcloud_api_key + app_center_gui_password = var.app_center_gui_password lsf_version = var.lsf_version resource_group_ids = local.resource_group_ids cluster_prefix = var.cluster_prefix @@ -141,9 +150,10 @@ module "prepare_tf_input" { vpc_name = local.vpc_name storage_subnets = local.storage_subnet protocol_subnets = local.protocol_subnet - cluster_subnet_ids = local.cluster_subnet + cluster_subnet_id = local.cluster_subnet client_subnets = local.client_subnet login_subnet_id = local.login_subnet + login_instance = var.login_instance dns_domain_names = var.dns_domain_names key_management = local.key_management kms_instance_name = var.kms_instance_name @@ -154,7 +164,6 @@ module "prepare_tf_input" { dns_custom_resolver_id = var.dns_custom_resolver_id dns_instance_id = var.dns_instance_id bastion_security_group_id = local.bastion_security_group_id - bastion_security_group_id_for_ref = local.bastion_security_group_id_for_ref deployer_hostname = local.deployer_hostname enable_hyperthreading = var.enable_hyperthreading scc_enable = var.scc_enable @@ -176,7 +185,7 @@ module "prepare_tf_input" { observability_atracker_enable = var.observability_atracker_enable observability_atracker_target_type = var.observability_atracker_target_type enable_ldap = var.enable_ldap - ldap_instances = var.ldap_instances + ldap_instance = var.ldap_instance ldap_server = local.ldap_server ldap_basedns = var.ldap_basedns ldap_server_cert = local.ldap_server_cert @@ -190,12 +199,18 @@ module "prepare_tf_input" { gklm_instances = var.gklm_instances scale_encryption_type = var.scale_encryption_type filesystem_config = var.filesystem_config + scale_encryption_admin_default_password = var.scale_encryption_admin_default_password + scale_encryption_admin_username = var.scale_encryption_admin_username scale_encryption_admin_password = var.scale_encryption_admin_password scale_encryption_enabled = var.scale_encryption_enabled - github_token = var.github_token + key_protect_instance_id = var.key_protect_instance_id storage_security_group_id = var.storage_security_group_id custom_file_shares = var.custom_file_shares existing_bastion_instance_name = var.existing_bastion_instance_name + existing_bastion_security_group_id = var.existing_bastion_security_group_id + vpc_cluster_private_subnets_cidr_blocks = var.vpc_cluster_private_subnets_cidr_blocks + bms_boot_drive_encryption = var.bms_boot_drive_encryption + tie_breaker_bm_server = var.tie_breaker_bm_server depends_on = [module.deployer] } @@ -210,15 +225,17 @@ module "validate_ldap_server_connection" { } module "resource_provisioner" { - source = "./modules/resource_provisioner" - ibmcloud_api_key = var.ibmcloud_api_key - enable_deployer = var.enable_deployer - bastion_fip = local.bastion_fip - bastion_private_key_content = local.bastion_ssh_private_key != null ? local.bastion_ssh_private_key : local.bastion_private_key_content - deployer_ip = local.deployer_ip - scheduler = var.scheduler - github_token = var.github_token - depends_on = [module.deployer, module.prepare_tf_input, module.validate_ldap_server_connection] + source = "./modules/resource_provisioner" + ibmcloud_api_key = var.ibmcloud_api_key + enable_deployer = var.enable_deployer + cluster_prefix = var.cluster_prefix + bastion_fip = local.bastion_fip + bastion_private_key_content = local.bastion_ssh_private_key != null ? local.bastion_ssh_private_key : local.bastion_private_key_content + deployer_ip = local.deployer_ip + scheduler = var.scheduler + existing_bastion_instance_name = var.existing_bastion_instance_name + bastion_public_key_content = local.bastion_public_key_content + depends_on = [module.deployer, module.prepare_tf_input, module.validate_ldap_server_connection] } module "cos" { @@ -262,7 +279,7 @@ module "dns" { subnets_crn = local.subnets_crn dns_instance_id = var.dns_instance_id dns_custom_resolver_id = var.dns_custom_resolver_id - dns_domain_names = values(var.dns_domain_names) + dns_domain_names = compact(values(var.dns_domain_names)) } module "compute_dns_records" { @@ -275,7 +292,7 @@ module "compute_dns_records" { } module "storage_dns_records" { - count = var.enable_deployer == false ? 1 : 0 + count = var.enable_deployer == false && length(var.storage_instances) > 0 ? 1 : 0 source = "./modules/dns_record" dns_instance_id = local.dns_instance_id dns_zone_id = local.storage_dns_zone_id @@ -296,7 +313,7 @@ module "protocol_reserved_ip" { } module "client_dns_records" { - count = var.enable_deployer == false ? 1 : 0 + count = var.enable_deployer == false && length(var.client_instances) > 0 ? 1 : 0 source = "./modules/dns_record" dns_instance_id = local.dns_instance_id dns_zone_id = local.client_dns_zone_id @@ -305,7 +322,7 @@ module "client_dns_records" { } module "gklm_dns_records" { - count = var.enable_deployer == false ? 1 : 0 + count = var.enable_deployer == false && length(var.gklm_instances) > 0 ? 1 : 0 source = "./modules/dns_record" dns_instance_id = local.dns_instance_id dns_zone_id = local.gklm_dns_zone_id @@ -327,12 +344,14 @@ module "write_compute_cluster_inventory" { lsf_clients = local.client_nodes gui_hosts = local.gui_hosts db_hosts = local.db_hosts + login_host = local.login_host prefix = var.cluster_prefix ha_shared_dir = local.ha_shared_dir nfs_install_dir = local.nfs_install_dir enable_monitoring = local.enable_monitoring lsf_deployer_hostname = local.lsf_deployer_hostname ibmcloud_api_key = var.ibmcloud_api_key + app_center_gui_password = var.app_center_gui_password lsf_version = var.lsf_version dns_domain_names = var.dns_domain_names compute_public_key_content = local.compute_public_key_content @@ -348,6 +367,8 @@ module "write_compute_cluster_inventory" { compute_security_group_id = local.compute_security_group_id compute_ssh_keys_ids = local.ssh_keys_ids compute_subnet_crn = local.compute_subnet_crn + kms_encryption_enabled = local.kms_encryption_enabled + boot_volume_encryption_key = var.boot_volume_encryption_key depends_on = [time_sleep.wait_60_seconds, module.landing_zone_vsi] } @@ -363,7 +384,7 @@ module "write_compute_scale_cluster_inventory" { vpc_region = jsonencode(local.region) vpc_availability_zones = var.zones scale_version = jsonencode(local.scale_version) - compute_cluster_filesystem_mountpoint = jsonencode(var.static_compute_instances[0]["filesystem"]) + compute_cluster_filesystem_mountpoint = jsonencode(var.scale_compute_cluster_filesystem_mountpoint) storage_cluster_filesystem_mountpoint = jsonencode("None") filesystem_block_size = jsonencode("None") compute_cluster_instance_private_ips = concat((local.enable_sec_interface_compute ? local.secondary_compute_instance_private_ips : local.compute_instance_private_ips), local.compute_mgmt_instance_private_ips) @@ -420,9 +441,9 @@ module "write_storage_scale_cluster_inventory" { compute_cluster_instance_private_ips = [] compute_cluster_instance_private_dns_ip_map = {} compute_cluster_instance_names = [] - storage_cluster_instance_ids = var.storage_type == "persistent" ? concat(local.baremetal_cluster_instance_ids, local.strg_mgmtt_instance_ids, local.tie_breaker_storage_instance_ids) : concat(local.storage_cluster_instance_ids, local.strg_mgmtt_instance_ids, local.tie_breaker_storage_instance_ids) - storage_cluster_instance_private_ips = var.storage_type == "persistent" ? concat(local.baremetal_cluster_instance_private_ips, local.strg_mgmt_instance_private_ips, local.tie_breaker_storage_instance_private_ips) : concat(local.storage_cluster_instance_private_ips, local.strg_mgmt_instance_private_ips, local.tie_breaker_storage_instance_private_ips) - storage_cluster_instance_names = var.storage_type == "persistent" ? concat(local.baremetal_cluster_instance_names, local.strg_mgmt_instance_names, local.tie_breaker_storage_instance_names) : concat(local.storage_cluster_instance_names, local.strg_mgmt_instance_names, local.tie_breaker_storage_instance_names) + storage_cluster_instance_ids = var.storage_type == "persistent" ? concat(local.baremetal_cluster_instance_ids, local.strg_mgmtt_instance_ids, local.bm_tie_breaker_ids) : concat(local.storage_cluster_instance_ids, local.strg_mgmtt_instance_ids, local.tie_breaker_storage_instance_ids) + storage_cluster_instance_private_ips = var.storage_type == "persistent" ? concat(local.baremetal_cluster_instance_private_ips, local.strg_mgmt_instance_private_ips, local.bm_tie_breaker_private_ips) : concat(local.storage_cluster_instance_private_ips, local.strg_mgmt_instance_private_ips, local.tie_breaker_storage_instance_private_ips) + storage_cluster_instance_names = var.storage_type == "persistent" ? concat(local.baremetal_cluster_instance_names, local.strg_mgmt_instance_names, local.bm_tie_breaker_names) : concat(local.storage_cluster_instance_names, local.strg_mgmt_instance_names, local.tie_breaker_storage_instance_names) storage_cluster_with_data_volume_mapping = local.storage_ips_with_vol_mapping[0] storage_cluster_instance_private_dns_ip_map = {} storage_cluster_desc_instance_private_ips = local.strg_tie_breaker_private_ips @@ -500,6 +521,17 @@ module "write_client_scale_cluster_inventory" { filesystem_mountpoint = jsonencode("") } +module "key_protect_scale" { + count = var.scale_encryption_enabled == true && var.scale_encryption_type == "key_protect" ? 1 : 0 + source = "./modules/key_protect" + key_protect_instance_id = var.key_protect_instance_id != null ? var.key_protect_instance_id : module.landing_zone.key_management_instance_id + resource_prefix = var.cluster_prefix + vpc_region = local.region + resource_group_id = var.resource_group_ids["service_rg"] + key_protect_path = format("%s/scale_key_protect", var.scale_ansible_repo_clone_path) + vpc_storage_cluster_dns_domain = var.dns_domain_names["storage"] +} + module "compute_cluster_configuration" { count = var.scheduler == "Scale" && var.enable_deployer == false ? 1 : 0 source = "./modules/common/compute_configuration" @@ -533,8 +565,8 @@ module "compute_cluster_configuration" { ldap_basedns = var.ldap_basedns ldap_server = var.enable_ldap ? local.ldap_instance_private_ips[0] : null ldap_admin_password = local.ldap_admin_password == "" ? jsonencode(null) : local.ldap_admin_password - enable_key_protect = var.scale_encryption_type - depends_on = [module.write_compute_scale_cluster_inventory] + enable_key_protect = var.scale_encryption_type == "key_protect" ? "True" : "False" + depends_on = [module.write_compute_scale_cluster_inventory, module.key_protect_scale] } module "storage_cluster_configuration" { @@ -594,8 +626,8 @@ module "storage_cluster_configuration" { ldap_server = var.enable_ldap ? local.ldap_instance_private_ips[0] : null ldap_admin_password = local.ldap_admin_password == "" ? jsonencode(null) : local.ldap_admin_password ldap_server_cert = local.ldap_server_cert - enable_key_protect = var.scale_encryption_type - depends_on = [module.write_storage_scale_cluster_inventory] + enable_key_protect = var.scale_encryption_type == "key_protect" ? "True" : "False" + depends_on = [module.write_storage_scale_cluster_inventory, module.key_protect_scale] } module "client_configuration" { @@ -643,11 +675,56 @@ module "remote_mount_configuration" { depends_on = [module.compute_cluster_configuration, module.storage_cluster_configuration] } +module "encryption_configuration" { + source = "./modules/common/encryption_configuration" + count = var.scheduler == "Scale" && var.enable_deployer == false && var.scale_encryption_enabled && var.scale_encryption_type == "gklm" ? 1 : 0 + turn_on = (var.create_separate_namespaces == true && local.storage_instance_count > 0) ? true : false + clone_path = var.scale_ansible_repo_clone_path + create_scale_cluster = var.create_scale_cluster + meta_private_key = module.landing_zone_vsi[0].storage_private_key_content + scale_encryption_type = var.scale_encryption_type != null ? var.scale_encryption_type : null + scale_encryption_admin_password = var.scale_encryption_admin_password + scale_encryption_servers = var.scale_encryption_enabled && var.scale_encryption_type == "gklm" ? local.gklm_instance_private_ips : [] + scale_encryption_servers_dns = var.scale_encryption_type == "gklm" ? jsonencode([for instance in local.gklm_instances : instance.name]) : jsonencode([]) + scale_cluster_clustername = var.cluster_prefix + scale_encryption_admin_default_password = var.scale_encryption_admin_default_password + scale_encryption_admin_username = var.scale_encryption_admin_username + compute_cluster_create_complete = module.compute_cluster_configuration[0].compute_cluster_create_complete + storage_cluster_create_complete = module.storage_cluster_configuration[0].storage_cluster_create_complete + remote_mount_create_complete = module.remote_mount_configuration[0].remote_mount_create_complete + compute_cluster_encryption = (var.create_separate_namespaces == true && local.static_compute_instance_count > 0) ? true : false + storage_cluster_encryption = (var.create_separate_namespaces == true && local.storage_instance_count > 0) ? true : false + depends_on = [module.client_configuration, module.compute_cluster_configuration, module.storage_cluster_configuration] +} + +module "ldap_configuration" { + count = var.scheduler == "Scale" && var.enable_deployer == false ? 1 : 0 + source = "./modules/common/ldap_configuration" + turn_on = var.enable_ldap && var.ldap_server == "null" + clone_path = var.scale_ansible_repo_clone_path + create_scale_cluster = var.create_scale_cluster + bastion_user = jsonencode(var.bastion_user) + write_inventory_complete = module.write_storage_scale_cluster_inventory[0].write_scale_inventory_complete + ldap_cluster_prefix = var.cluster_prefix + script_path = format("%s/modules/common/scripts/prepare_ldap_inv.py", var.scale_ansible_repo_clone_path) + using_jumphost_connection = var.using_jumphost_connection + bastion_instance_public_ip = local.bastion_fip + bastion_ssh_private_key = local.bastion_ssh_private_key != null ? local.bastion_ssh_private_key : local.bastion_private_key_content + ldap_basedns = var.ldap_basedns + ldap_admin_password = var.ldap_admin_password + ldap_user_name = var.ldap_user_name + ldap_user_password = var.ldap_user_password + ldap_server = local.ldap_server + meta_private_key = module.landing_zone_vsi[0].storage_private_key_content + depends_on = [module.validate_ldap_server_connection] +} + module "compute_inventory" { count = var.enable_deployer == false ? 1 : 0 source = "./modules/inventory" scheduler = var.scheduler hosts = local.compute_hosts + login_host = local.login_host inventory_path = local.compute_inventory_path name_mount_path_map = local.fileshare_name_mount_path_map logs_enable_for_management = var.observability_logs_enable_for_management @@ -703,6 +780,13 @@ module "compute_inventory_hosts" { inventory_path = local.compute_hosts_inventory_path } +module "login_inventory_host" { + count = var.enable_deployer == false ? 1 : 0 + source = "./modules/inventory_hosts" + hosts = local.login_host_ip + inventory_path = local.login_host_inventory_path +} + module "bastion_inventory_hosts" { count = var.enable_deployer == true ? 1 : 0 source = "./modules/inventory_hosts" @@ -710,6 +794,13 @@ module "bastion_inventory_hosts" { inventory_path = local.bastion_hosts_inventory_path } +module "deployer_inventory_hosts" { + count = var.enable_deployer == true ? 1 : 0 + source = "./modules/inventory_hosts" + hosts = local.deployer_hosts_ips + inventory_path = local.deployer_hosts_inventory_path +} + module "ldap_inventory_hosts" { count = var.enable_deployer == false && var.enable_ldap == true ? 1 : 0 source = "./modules/inventory_hosts" @@ -724,8 +815,7 @@ module "compute_playbook" { bastion_fip = local.bastion_fip private_key_path = local.compute_private_key_path inventory_path = local.compute_inventory_path - playbook_path = local.compute_playbook_path - enable_bastion = var.enable_bastion + enable_deployer = var.enable_deployer ibmcloud_api_key = var.ibmcloud_api_key observability_provision = var.observability_logs_enable_for_management || var.observability_logs_enable_for_compute || var.observability_monitoring_enable ? true : false cloudlogs_provision = var.observability_logs_enable_for_management || var.observability_logs_enable_for_compute ? true : false @@ -734,7 +824,13 @@ module "compute_playbook" { enable_ldap = var.enable_ldap ldap_server = local.ldap_server playbooks_path = local.playbooks_path - depends_on = [module.compute_inventory] + mgmnt_hosts = local.mgmnt_host_entry + comp_hosts = local.comp_host_entry + login_host = local.login_host_entry + deployer_host = local.deployer_host_entry + domain_name = var.dns_domain_names["compute"] + enable_dedicated_host = var.enable_dedicated_host + depends_on = [module.compute_inventory, module.landing_zone_vsi] } ################################################### @@ -758,7 +854,7 @@ module "cloud_monitoring_instance_creation" { cloud_logs_as_atracker_target = var.observability_atracker_enable && (var.observability_atracker_target_type == "cloudlogs") ? true : false cloud_logs_data_bucket = var.cloud_logs_data_bucket cloud_metrics_data_bucket = var.cloud_metrics_data_bucket - tags = ["hpc", var.cluster_prefix] + tags = ["lsf", var.cluster_prefix] } # Code for SCC Instance @@ -769,7 +865,7 @@ module "scc_instance_and_profile" { rg = var.resource_group_ids["service_rg"] scc_profile = var.scc_enable ? var.scc_profile : "" event_notification_plan = var.scc_event_notification_plan - tags = ["hpc", var.cluster_prefix] + tags = ["lsf", var.cluster_prefix] prefix = var.cluster_prefix cos_bucket = var.scc_cos_bucket cos_instance_crn = var.scc_cos_instance_crn diff --git a/modules/ansible-roles/roles/cloudmonitoring/tasks/mgmt-cloudmonitoring-configure.yml b/modules/ansible-roles/roles/cloudmonitoring/tasks/mgmt-cloudmonitoring-configure.yml index 284fa3bd..48f7b26b 100644 --- a/modules/ansible-roles/roles/cloudmonitoring/tasks/mgmt-cloudmonitoring-configure.yml +++ b/modules/ansible-roles/roles/cloudmonitoring/tasks/mgmt-cloudmonitoring-configure.yml @@ -71,8 +71,14 @@ line: "{{ item.line }}" create: yes loop: - - { regexp: "==ACCESSKEY==", line: "customerid: {{ cloud_monitoring_access_key }}" } - - { regexp: "==COLLECTOR==", line: "collector: {{ cloud_monitoring_ingestion_url }}" } + - { + regexp: "==ACCESSKEY==", + line: "customerid: {{ cloud_monitoring_access_key }}", + } + - { + regexp: "==COLLECTOR==", + line: "collector: {{ cloud_monitoring_ingestion_url }}", + } - { regexp: "^tags:", line: "tags: type:management,lsf:true" } when: monitoring_enable_for_management | bool @@ -113,14 +119,11 @@ executable: /bin/bash when: monitoring_enable_for_management | bool -- name: Install LSF Prometheus Exporter if not already installed - ansible.builtin.shell: | - echo "Exporter not found. Cloning and installing..."; - rm -rf /tmp/lsf_prometheus_exporter - git clone --branch wheel_pkg https://89088669eefb301f1aaa7046c9e567a372c51fe2@github.ibm.com/platformcomputing/lsf_prometheus_exporter /tmp/lsf_prometheus_exporter - {{ pip_executable }} install /tmp/lsf_prometheus_exporter/lsf_prometheus_exporter-1.0.0-py3-none-any.whl - args: - executable: /bin/bash +- name: Install LSF Prometheus Exporter using pip + ansible.builtin.pip: + name: lsf_prometheus_exporter + executable: /usr/local/bin/pip3.11 + extra_args: --no-cache-dir --force-reinstall when: - monitoring_enable_for_management | bool - not exporter_installed.stat.exists @@ -163,7 +166,7 @@ exec >> /var/log/lsf_prometheus_exporter.log 2>&1 source /opt/ibm/lsfsuite/lsf/conf/profile.lsf exec /usr/bin/python3 -m lsf_prometheus_exporter - mode: '0755' + mode: "0755" owner: lsfadmin group: lsfadmin when: @@ -172,7 +175,7 @@ - name: Create systemd service for Prometheus Agent ansible.builtin.copy: dest: /etc/systemd/system/prometheus.service - mode: '0644' + mode: "0644" content: | [Unit] Description=Prometheus Agent @@ -198,6 +201,24 @@ - monitoring_enable_for_management | bool - not exporter_installed.stat.exists +- name: Enable LSF scheduler metrics for Prometheus + ansible.builtin.lineinfile: + path: "{{ LSF_CONF }}/lsbatch/{{ prefix }}/configdir/lsb.params" + insertbefore: "^End Parameters" + line: "SCHED_METRIC_ENABLE=Y" + state: present + backup: yes + when: + - monitoring_enable_for_management | bool + +- name: Restart lsfd service to apply scheduler metric changes + ansible.builtin.systemd: + name: lsfd + state: restarted + enabled: yes + when: + - monitoring_enable_for_management | bool + - name: Reload systemd and start Prometheus Agent ansible.builtin.systemd: daemon_reload: yes @@ -226,7 +247,7 @@ [Install] WantedBy=multi-user.target - mode: '0644' + mode: "0644" when: monitoring_enable_for_management | bool - name: Reload systemd and start Prometheus Agent @@ -240,7 +261,7 @@ - name: Ensure start script has correct permissions ansible.builtin.file: path: /opt/ibm/lsfsuite/lsf/start_lsf_prometheus_exporter.sh - mode: '0755' + mode: "0755" owner: lsfadmin group: lsfadmin when: monitoring_enable_for_management | bool @@ -262,10 +283,11 @@ RestartSec=10 User=lsfadmin Group=lsfadmin + Restart=always [Install] WantedBy=multi-user.target - mode: '0644' + mode: "0644" when: monitoring_enable_for_management | bool - name: Reload systemd and start LSF Prometheus Exporter diff --git a/modules/ansible-roles/roles/cloudmonitoring/vars/main.yml b/modules/ansible-roles/roles/cloudmonitoring/vars/main.yml index 15a1b6b7..d32d3330 100644 --- a/modules/ansible-roles/roles/cloudmonitoring/vars/main.yml +++ b/modules/ansible-roles/roles/cloudmonitoring/vars/main.yml @@ -1,3 +1,4 @@ sysdig_config_file: "/opt/draios/etc/dragent.yaml" prometheus_config_file: "/opt/prometheus/prometheus.yml" +LSF_CONF: "/opt/ibm/lsfsuite/lsf/conf" PROMETHEUS_VERSION: "2.51.1" diff --git a/modules/ansible-roles/roles/lsf_login_config/tasks/login_node_configuration.yml b/modules/ansible-roles/roles/lsf_login_config/tasks/login_node_configuration.yml new file mode 100644 index 00000000..6fa39b96 --- /dev/null +++ b/modules/ansible-roles/roles/lsf_login_config/tasks/login_node_configuration.yml @@ -0,0 +1,80 @@ +--- + +- name: Check if LSF logs directory exists + stat: + path: "{{ LSF_LOGS }}" + register: logs_dir_stat + +- name: Ensure LSF logs directory exists (recurse only on first creation) + file: + path: "{{ LSF_LOGS }}" + state: directory + owner: lsfadmin + group: lsfadmin + mode: '0755' + recurse: "{{ not logs_dir_stat.stat.exists }}" + +- name: Ensure LSF conf and work are symlinks via shell + shell: | + [ -L "{{ LSF_TOP }}/{{ item }}" ] && echo "Symlink exists, skipping." || { \ + [ -d "{{ LSF_TOP }}/{{ item }}" ] && rm -rf "{{ LSF_TOP }}/{{ item }}"; \ + ln -s /mnt/lsf/lsf/{{ item }} "{{ LSF_TOP }}/{{ item }}"; } + loop: + - conf + - work + +- name: Ensure correct ownership and permissions of /opt/ibm/lsfsuite + file: + path: "{{ LSF_SUITE }}" + owner: lsfadmin + group: lsfadmin + mode: '0777' + recurse: yes + +- name: Set login_node_host to first host in login_node group + set_fact: + login_node_host: "{{ groups['login_node'][0] }}" + +- name: Get IPv4 address of the current host + shell: "getent ahostsv4 {{ inventory_hostname }} | awk '{ print $1; exit }'" + register: ip_result + changed_when: false + +- name: Ensure login node entry exists in LSF hosts file + lineinfile: + path: /mnt/lsf/lsf/conf/hosts + line: "{{ ip_result.stdout }} {{ login_node_host }}.{{ dns_domain_names }}" + state: present + insertafter: EOF + create: yes + +- name: Insert hostname line after "#prune" only once + lineinfile: + path: "{{ LSF_CLUSTER_FILE }}" + insertafter: "^#prune" + line: "{{ login_node_host }}.{{ dns_domain_names }} Intel_E5 X86_64 0 ()" + state: present + +- name: Ensure LSF profile is sourced in root's .bashrc + lineinfile: + path: "/root/.bashrc" + line: "source {{ LSF_CONF }}/profile.lsf" + state: present + +- name: Ensure LSF profile is sourced in lsfadmin's .bashrc + lineinfile: + path: "{{ LSFADMIN_DIR }}/.bashrc" + line: "source {{ LSF_CONF }}/profile.lsf" + state: present + +- name: Source current user's .bashrc (only if updated) + shell: | + grep -q "source {{ LSF_CONF }}/profile.lsf" /root/.bashrc && source /root/.bashrc || true + args: + executable: /bin/bash + +- name: Source lsfadmin's .bashrc (only if updated) + shell: | + grep -q "source {{ LSF_CONF }}/profile.lsf" "{{ LSFADMIN_DIR }}/.bashrc" && source "{{ LSFADMIN_DIR }}/.bashrc" || true + args: + executable: /bin/bash diff --git a/modules/ansible-roles/roles/lsf_login_config/tasks/main.yml b/modules/ansible-roles/roles/lsf_login_config/tasks/main.yml new file mode 100644 index 00000000..167fd89a --- /dev/null +++ b/modules/ansible-roles/roles/lsf_login_config/tasks/main.yml @@ -0,0 +1,4 @@ +--- + +# Configure Login node +- import_tasks: login_node_configuration.yml diff --git a/modules/ansible-roles/roles/lsf_login_config/vars/main.yml b/modules/ansible-roles/roles/lsf_login_config/vars/main.yml new file mode 100644 index 00000000..9328fea8 --- /dev/null +++ b/modules/ansible-roles/roles/lsf_login_config/vars/main.yml @@ -0,0 +1,8 @@ +LSF_SUITE: "/opt/ibm/lsfsuite" +LSF_TOP: "{{ LSF_SUITE }}/lsf" +LSF_CONF: "{{ LSF_TOP }}/conf" +LSF_WORK: "{{ LSF_TOP }}/work" +LSF_LOGS: "/opt/ibm/lsflogs" +LSF_HOSTS_FILE: "{{ LSF_CONF }}/hosts" +LSF_CLUSTER_FILE: "{{ LSF_CONF }}/lsf.cluster.{{ prefix }}" +LSFADMIN_DIR: "/home/lsfadmin" diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/app_center_configure.yml b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/app_center_configure.yml new file mode 100644 index 00000000..5f72237a --- /dev/null +++ b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/app_center_configure.yml @@ -0,0 +1,73 @@ +--- +# AppCenter HTTPS Configuration + +- name: PAC | Check if HTTPS is already enabled + ansible.builtin.command: pmcadmin https enable + register: https_check + changed_when: "'was already enabled' not in https_check.stdout" + failed_when: false + run_once: true + +- name: PAC | Debug HTTPS status + ansible.builtin.debug: + msg: "HTTPS is already enabled" + when: "'was already enabled' in https_check.stdout" + run_once: true + +- name: PAC | Configure HTTPS for AppCenter + block: + + - name: PAC | Set GUI password for lsfadmin + ansible.builtin.command: passwd --stdin lsfadmin + args: + stdin: "{{ app_center_gui_password }}" + + - name: PAC | Enable HTTPS access for AppCenter + ansible.builtin.command: > + pmcadmin https enable + --password {{ app_center_gui_password }} + --validhosts localhost + + - name: PAC | Stop pmcadmin service + ansible.builtin.command: pmcadmin stop + + - name: PAC | Pause before restarting pmcadmin + ansible.builtin.pause: + seconds: 5 + + - name: PAC | Start pmcadmin service + ansible.builtin.command: pmcadmin start + + - name: PAC | Update JS_PAC_SERVER_URL in js.conf + ansible.builtin.lineinfile: + path: "{{ JS_PAC_SERVER_URL }}" + regexp: '^JS_PAC_SERVER_URL=' + line: "JS_PAC_SERVER_URL=https://{{ lsf_masters[0] }}:8443" + backrefs: true + + - name: PAC | Stop ACD (Application Center Daemon) service + ansible.builtin.service: + name: acd + state: stopped + + - name: PAC | Pause before restarting ACD + ansible.builtin.pause: + seconds: 5 + + - name: PAC | Start ACD (Application Center Daemon) service + ansible.builtin.service: + name: acd + state: started + + rescue: + - name: PAC | Log error if AppCenter HTTPS configuration fails + ansible.builtin.debug: + msg: "AppCenter HTTPS configuration block failed. Check previous task results." + + always: + - name: PAC | Always log final status of AppCenter HTTPS configuration + ansible.builtin.debug: + msg: "AppCenter HTTPS configuration block completed (success or failure)." + + when: "'was already enabled' not in https_check.stdout" + run_once: true diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/configure_dynamic_nodes_templates.yml b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/configure_dynamic_nodes_templates.yml index 53f83555..8fa72065 100644 --- a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/configure_dynamic_nodes_templates.yml +++ b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/configure_dynamic_nodes_templates.yml @@ -33,13 +33,3 @@ dest: "{{ LSF_RC_IC_CONF }}/user_data.sh" mode: '0644' run_once: true - -- name: Management Config Templates | Restart lsfd service - service: - name: lsfd - state: restarted - -- name: Management Config Templates | Restart NetworkManager - service: - name: NetworkManager - state: restarted diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/configure_management_nodes.yml b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/configure_management_nodes.yml index f3dfb39c..670fc5f3 100644 --- a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/configure_management_nodes.yml +++ b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/configure_management_nodes.yml @@ -103,3 +103,21 @@ path: "{{ LSF_CLUSTER_FILE }}" regexp: '^lsfservers' state: absent + run_once: true + +# Temporary: Remove after new image build includes cleanup +- name: Temporary Cleanup | Delete all 'sagar-fp-15-new1' folders + ansible.builtin.shell: | + find "{{ LSF_EXT_CONF }}" -type d -name "sagar-fp-15-new1" -exec rm -rf {} + + args: + warn: false + ignore_errors: true + when: inventory_hostname in groups['management_nodes'] + +# Temporary: Remove after new image build includes cleanup +- name: Temporary Cleanup | Replace 'sagar-fp-15-new1' with 'lsfservers' + ansible.builtin.shell: | + grep -rl 'sagar-fp-15-new1' "{{ LSF_EXT_CONF }}" | xargs sed -i 's/sagar-fp-15-new1/lsfservers/g' || true + args: + warn: false + when: inventory_hostname in groups['management_nodes'] diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/hosts_file_update.yml b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/hosts_file_update.yml index 9a106e22..ca3c63fa 100644 --- a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/hosts_file_update.yml +++ b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/hosts_file_update.yml @@ -31,7 +31,7 @@ - name: Management Config | Aggregate all IPs from all hosts set_fact: - all_ips: "{{ groups['all'] | map('extract', hostvars, 'host_ip') | list }}" + all_ips: "{{ groups['mgmt_compute_nodes'] | map('extract', hostvars, 'host_ip') | list }}" run_once: true - name: Management Config | Display all resolved IP addresses diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/hyperthreading.yml b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/hyperthreading.yml index 181732ff..87c96cd8 100644 --- a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/hyperthreading.yml +++ b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/hyperthreading.yml @@ -11,28 +11,6 @@ msg: "EGO_DEFINE_NCPUS is set to {{ ego_define_ncpus }}" run_once: true -- name: Hyperthreading | Create LSF hyperthreading script for disabling threads if hyperthreading is false - copy: - dest: "{{ hyperthreading_file }}" - content: | - #!/bin/sh - for vcpu in $(cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | cut -s -d- -f2 | cut -d- -f2 | uniq); do - echo "0" > "/sys/devices/system/cpu/cpu"$vcpu"/online" - done - mode: '0755' - when: not enable_hyperthreading - -- name: Hyperthreading | Run the hyperthreading script and add to cron if hyperthreading is false - shell: "{{ hyperthreading_file }}" - when: not enable_hyperthreading - -- name: Hyperthreading | Add script to cron for reboot if hyperthreading is false - cron: - name: "Disable Hyperthreading" - special_time: reboot - job: "{{ hyperthreading_file }}" - when: not enable_hyperthreading - - name: Hyperthreading | Set the EGO_DEFINE_NCPUS in LSF config file lineinfile: path: "{{ LSF_CONF_FILE }}" diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/main.yml b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/main.yml index 0a47f17c..1f745d31 100644 --- a/modules/ansible-roles/roles/lsf_mgmt_config/tasks/main.yml +++ b/modules/ansible-roles/roles/lsf_mgmt_config/tasks/main.yml @@ -6,4 +6,6 @@ - import_tasks: hosts_file_update.yml +- import_tasks: app_center_configure.yml + - import_tasks: configure_dynamic_nodes_templates.yml diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/templates/ibmcloudgen2_templates.json.j2 b/modules/ansible-roles/roles/lsf_mgmt_config/templates/ibmcloudgen2_templates.json.j2 index 0278931b..358f5b11 100644 --- a/modules/ansible-roles/roles/lsf_mgmt_config/templates/ibmcloudgen2_templates.json.j2 +++ b/modules/ansible-roles/roles/lsf_mgmt_config/templates/ibmcloudgen2_templates.json.j2 @@ -10,6 +10,7 @@ "mem": ["Numeric", "{{ rc_mem_in_mb }}"], "icgen2host": ["Boolean", "1"] }, + "crn": "{{ boot_volume_encryption_key }}", "imageId": "{{ image_id }}", "subnetId": "{{ compute_subnet_crn }}", "vpcId": "{{ vpc_id }}", diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/templates/user_data.sh b/modules/ansible-roles/roles/lsf_mgmt_config/templates/user_data.sh index 2b5faac8..4d5336e1 100644 --- a/modules/ansible-roles/roles/lsf_mgmt_config/templates/user_data.sh +++ b/modules/ansible-roles/roles/lsf_mgmt_config/templates/user_data.sh @@ -1,10 +1,11 @@ #!/bin/bash logfile="/tmp/user_data.log" -echo "START $(date '+%Y-%m-%d %H:%M:%S')" >> $logfile +echo "START $(date '+%Y-%m-%d %H:%M:%S')" >>$logfile # Initialize variables cluster_prefix="{{ prefix }}" +default_cluster_name="myCluster" nfs_server_with_mount_path="{{ mount_paths_map['/mnt/lsf'] }}" cloud_monitoring_access_key="{{ cloud_monitoring_access_key }}" cloud_monitoring_ingestion_url="{{ cloud_monitoring_ingestion_url }}" @@ -16,7 +17,6 @@ custom_file_shares="{% for key, value in mount_paths_map.items() if key != '/mnt custom_mount_paths="{% for key in mount_paths_map.keys() if key != '/mnt/lsf' %}{{ key }}{% if not loop.last %} {% endif %}{% endfor %}" hyperthreading="{{ enable_hyperthreading }}" ManagementHostNames="{{ lsf_masters | join(' ') }}" -# rc_cidr_block="{{ compute_subnets_cidr | first }}" dns_domain="{{ dns_domain_names }}" network_interface="eth0" @@ -29,16 +29,12 @@ ldap_basedns="{{ ldap_basedns }}" HostIP=$(hostname -I | awk '{print $1}') hostname=${cluster_prefix}-${HostIP//./-} hostnamectl set-hostname "${hostname}" -systemctl stop firewalld -systemctl disable firewalld # Setup vpcuser to login -if grep -E -q "CentOS|Red Hat" /etc/os-release -then - USER=vpcuser -elif grep -q "Ubuntu" /etc/os-release -then - USER=ubuntu +if grep -E -q "CentOS|Red Hat" /etc/os-release; then + USER=vpcuser +elif grep -q "Ubuntu" /etc/os-release; then + USER=ubuntu fi sed -i -e "s/^/no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"echo \'Please login as the user \\\\\"$USER\\\\\" rather than the user \\\\\"root\\\\\".\';echo;sleep 5; exit 142\" /" /root/.ssh/authorized_keys @@ -48,18 +44,14 @@ chage -I -1 -m 0 -M 99999 -E -1 -W 14 lsfadmin # Setup Network configuration if grep -q "NAME=\"Red Hat Enterprise Linux" /etc/os-release; then - echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-${network_interface}" - echo "DOMAIN=${dns_domain}" >> "/etc/sysconfig/network-scripts/ifcfg-${network_interface}" - gateway_ip=$(ip route | grep default | awk '{print $3}' | head -n 1) - cidr_range=$(ip route show | grep "kernel" | awk '{print $1}' | head -n 1) - echo "$cidr_range via $gateway_ip dev ${network_interface} metric 0 mtu 9000" >> /etc/sysconfig/network-scripts/route-${network_interface} - systemctl restart NetworkManager + echo "MTU=9000" >>"/etc/sysconfig/network-scripts/ifcfg-${network_interface}" + echo "DOMAIN=${dns_domain}" >>"/etc/sysconfig/network-scripts/ifcfg-${network_interface}" + gateway_ip=$(ip route | grep default | awk '{print $3}' | head -n 1) + cidr_range=$(ip route show | grep "kernel" | awk '{print $1}' | head -n 1) + echo "$cidr_range via $gateway_ip dev ${network_interface} metric 0 mtu 9000" >>/etc/sysconfig/network-scripts/route-${network_interface} + systemctl restart NetworkManager fi -# Setup VPC FileShare | NFS Mount -LSF_TOP="/opt/ibm/lsf" -echo "Initiating LSF share mount" >> $logfile - # Function to attempt NFS mount with retries mount_nfs_with_retries() { local server_path=$1 @@ -70,58 +62,54 @@ mount_nfs_with_retries() { rm -rf "${client_path}" mkdir -p "${client_path}" - for (( j=0; j> $logfile + for ((j = 0; j < retries; j++)); do + mount -t nfs -o sec=sys "$server_path" "$client_path" -v >>$logfile if mount | grep -q "${client_path}"; then - echo "Mount successful for ${server_path} on ${client_path}" >> $logfile + echo "Mount successful for ${server_path} on ${client_path}" >>$logfile success=true break else - echo "Attempt $((j+1)) of $retries failed for ${server_path} on ${client_path}" >> $logfile + echo "Attempt $((j + 1)) of $retries failed for ${server_path} on ${client_path}" >>$logfile sleep 2 fi done if [ "$success" = true ]; then chmod 777 "${client_path}" - echo "${server_path} ${client_path} nfs rw,sec=sys,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,_netdev 0 0" >> /etc/fstab + echo "${server_path} ${client_path} nfs rw,sec=sys,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,_netdev 0 0" >>/etc/fstab else - echo "Mount not found for ${server_path} on ${client_path} after $retries attempts." >> $logfile + echo "Mount not found for ${server_path} on ${client_path} after $retries attempts." >>$logfile rm -rf "${client_path}" fi } # Setup LSF share if [ -n "${nfs_server_with_mount_path}" ]; then - echo "File share ${nfs_server_with_mount_path} found" >> $logfile + echo "File share ${nfs_server_with_mount_path} found" >>$logfile nfs_client_mount_path="/mnt/lsf" if mount_nfs_with_retries "${nfs_server_with_mount_path}" "${nfs_client_mount_path}"; then - for dir in conf work; do - rm -rf "${LSF_TOP:?}/$dir" - ln -fs "${nfs_client_mount_path}/lsf/$dir" "${LSF_TOP}/$dir" - done - chown -R lsfadmin:root "${LSF_TOP}" + echo "Mount completed successfully with ${nfs_client_mount_path}" >>$logfile else - echo "Mount not found for ${nfs_server_with_mount_path}, Exiting !!" >> $logfile + echo "Mount not found for ${nfs_server_with_mount_path}, Exiting !!" >>$logfile exit 1 fi fi -echo "Setting LSF share is completed." >> $logfile +echo "Setting LSF share is completed." >>$logfile echo '{% raw %}' # Setup Custom file shares -echo "Setting custom file shares." >> "$logfile" +echo "Setting custom file shares." >>"$logfile" if [ -n "${custom_file_shares}" ]; then - echo "Custom file share ${custom_file_shares} found" >> "$logfile" - read -ra file_share_array <<< "${custom_file_shares}" - read -ra mount_path_array <<< "${custom_mount_paths}" + echo "Custom file share ${custom_file_shares} found" >>"$logfile" + read -ra file_share_array <<<"${custom_file_shares}" + read -ra mount_path_array <<<"${custom_mount_paths}" length=${#file_share_array[@]} - for (( i=0; i> "$logfile" +echo "Setting custom file shares is completed." >>"$logfile" echo '{% endraw %}' # Setup SSH @@ -129,50 +117,45 @@ LDAP_DIR="/home/lsfadmin" SSH_DIR="$LDAP_DIR/.ssh" mkdir -p "$SSH_DIR" cp /home/vpcuser/.ssh/authorized_keys "$SSH_DIR/authorized_keys" -cat "{{ ha_shared_dir }}/ssh/id_rsa.pub" >> "$SSH_DIR/authorized_keys" +cat "{{ ha_shared_dir }}/ssh/id_rsa.pub" >>"$SSH_DIR/authorized_keys" cp "{{ ha_shared_dir }}/ssh/id_rsa" "$SSH_DIR/id_rsa" -echo "StrictHostKeyChecking no" >> "$SSH_DIR/config" +echo "StrictHostKeyChecking no" >>"$SSH_DIR/config" chmod 600 "$SSH_DIR/authorized_keys" chmod 400 "$SSH_DIR/id_rsa" chmod 700 "$SSH_DIR" chown -R lsfadmin:lsfadmin "$SSH_DIR" # Setup LSF environment variables -LSF_TOP="/opt/ibm/lsf_worker" -LSF_TOP_VERSION=10.1 -LSF_CONF=$LSF_TOP/conf -LSF_CONF_FILE=$LSF_CONF/lsf.conf -{ - . "$LSF_CONF/profile.lsf" - echo "Logging environment variables" - env -} >> "$logfile" -echo "source ${LSF_CONF}/profile.lsf" >> ~/.bashrc -echo "source ${LSF_CONF}/profile.lsf" >> "$LDAP_DIR"/.bashrc -source "$HOME/.bashrc" -source "$LDAP_DIR/.bashrc" - -# DNS Setup -echo "search ${dns_domain}" >> /etc/resolv.conf - -# Defining ncpus based on hyper-threading -if [ "$hyperthreading" == "True" ]; then - ego_define_ncpus="threads" +LSF_TOP="/opt/ibm/lsfsuite/lsf" +LSF_CONF="$LSF_TOP/conf" +LSF_WORK="$LSF_TOP/work" +LSF_CONF_FILE="$LSF_CONF/lsf.conf" +LSF_LOGS="/opt/ibm/lsflogs" +SHARED_HOSTS="/mnt/lsf/lsf/conf/hosts" +LSF_HOSTS_FILE="${LSF_CONF}/hosts" +SYSTEM_HOSTS_FILE="/etc/hosts" + +# Create a logs folder +mkdir -p $LSF_LOGS +chown -R lsfadmin $LSF_LOGS +chown -R 755 $LSF_LOGS + +# Append the line only if the exact search line is not already present +if ! grep -Fxq "search ${dns_domain}" /etc/resolv.conf; then + echo "search ${dns_domain}" >>/etc/resolv.conf + echo "Appended DNS entry: search ${dns_domain}" >>"$logfile" else - ego_define_ncpus="cores" - cat << 'EOT' > /root/lsf_hyperthreading -#!/bin/sh -for vcpu in $(cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | cut -s -d- -f2 | cut -d- -f2 | uniq); do - echo "0" > "/sys/devices/system/cpu/cpu"$vcpu"/online" -done -EOT - chmod 755 /root/lsf_hyperthreading - command="/root/lsf_hyperthreading" - sh $command && (crontab -l 2>/dev/null; echo "@reboot $command") | crontab - + echo "DNS entry 'search ${dns_domain}' is already present." >>"$logfile" fi -echo "EGO_DEFINE_NCPUS=${ego_define_ncpus}" >> $LSF_CONF_FILE -cat /opt/ibm/lsf/conf/hosts >> /etc/hosts +# Check if source file exists +if [[ -f "$SHARED_HOSTS" ]]; then + cp -p "$SHARED_HOSTS" "$LSF_HOSTS_FILE" + cp -p "$SHARED_HOSTS" "$SYSTEM_HOSTS_FILE" +else + echo "Error: Source file '$SHARED_HOSTS' does not exist." >&2 >>"$logfile" + exit 1 +fi # Apply system tuning parameters LSF_TUNABLES="/etc/sysctl.conf" @@ -184,78 +167,99 @@ LSF_TUNABLES="/etc/sysctl.conf" echo 'net.core.wmem_default=26214400' echo 'net.ipv4.tcp_fin_timeout = 5' echo 'net.core.somaxconn = 8000' -} >> "$LSF_TUNABLES" +} >>"$LSF_TUNABLES" sudo sysctl -p $LSF_TUNABLES -# Update lsf configuration -echo 'LSB_MC_DISABLE_HOST_LOOKUP=Y' >> $LSF_CONF_FILE -echo "LSF_RSH=\"ssh -o 'PasswordAuthentication no' -o 'StrictHostKeyChecking no'\"" >> $LSF_CONF_FILE -sed -i "s/LSF_SERVER_HOSTS=.*/LSF_SERVER_HOSTS=\"$ManagementHostNames\"/g" $LSF_CONF_FILE -sed -i "s/LSF_ENABLE_EGO=N/LSF_ENABLE_EGO=Y/g" $LSF_CONF_FILE - -# TODO: Understand usage -# Support rc_account resource to enable RC_ACCOUNT policy -if [ -n "${rc_account}" ]; then -sed -i "s/\(LSF_LOCAL_RESOURCES=.*\)\"/\1 [resourcemap ${rc_account}*rc_account]\"/" $LSF_CONF_FILE -echo "Update LSF_LOCAL_RESOURCES lsf.conf successfully, add [resourcemap ${rc_account}*rc_account]" -fi - -# Add additional local resources if needed -instance_id=$(dmidecode | grep Family | cut -d ' ' -f 2 |head -1) -if [ -n "$instance_id" ]; then - sed -i "s/\(LSF_LOCAL_RESOURCES=.*\)\"/\1 [resourcemap $instance_id*instanceID]\"/" $LSF_CONF_FILE - echo "Update LSF_LOCAL_RESOURCES in $LSF_CONF_FILE successfully, add [resourcemap ${instance_id}*instanceID]" +# Defining ncpus based on hyper-threading +if [ "$hyperthreading" == "True" ]; then + ego_define_ncpus="threads" else - echo "Can not get instance ID" >> $logfile + ego_define_ncpus="cores" + cat <<'EOT' >/root/lsf_hyperthreading +#!/bin/sh +for vcpu in $(cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | cut -s -d- -f2 | cut -d- -f2 | uniq); do + echo "0" > "/sys/devices/system/cpu/cpu"$vcpu"/online" +done +EOT + chmod 755 /root/lsf_hyperthreading + command="/root/lsf_hyperthreading" + sh $command && ( + crontab -l 2>/dev/null + echo "@reboot $command" + ) | crontab - fi +echo "EGO_DEFINE_NCPUS=${ego_define_ncpus}" >>$LSF_CONF_FILE + +# Main Configuration for Dynamic Nodes +sed -i 's|^LSF_LOGDIR=.*|LSF_LOGDIR="/opt/ibm/lsflogs"|' $LSF_CONF_FILE +sed -i '/^lsfservers/d' "$LSF_CONF/lsf.cluster.$cluster_prefix" +grep -rli "$default_cluster_name" $LSF_CONF/* | xargs sed -i "s/$default_cluster_name/$cluster_prefix/g" +mv $LSF_WORK/$default_cluster_name $LSF_WORK/"$cluster_prefix" +find "$LSF_TOP" -name "*$default_cluster_name*" -print0 | while IFS= read -r -d '' file; do + new_file=$(echo "$file" | sed -r "s/$default_cluster_name/$cluster_prefix/g") + mv "$file" "$new_file" +done +grep -rli 'lsfservers' $LSF_CONF/* | xargs sed -i "s/lsfservers/${ManagementHostNames}/g" + +cat <>$LSF_CONF_FILE +LSF_SERVER_HOSTS="${ManagementHostNames}" +LSF_ADDON_HOSTS="$(echo "$ManagementHostNames" | awk '{print $1}')" +LSF_GET_CONF=lim +LSF_GPU_AUTOCONFIG=Y +LSB_GPU_NEW_SYNTAX=extend +EOF -echo 'LSF_STARTUP_USERS="lsfadmin"' | sudo tee -a /etc/lsf1.sudoers -echo "LSF_STARTUP_PATH=$LSF_TOP_VERSION/linux3.10-glibc2.17-x86_64/etc/" | sudo tee -a /etc/lsf.sudoers -chmod 600 /etc/lsf.sudoers -ls -l /etc/lsf.sudoers +# source profile.lsf +echo "source ${LSF_CONF}/profile.lsf" >>~/.bashrc +echo "source ${LSF_CONF}/profile.lsf" >>"$LDAP_DIR"/.bashrc +source "$HOME/.bashrc" +source "$LDAP_DIR/.bashrc" -cd /opt/ibm/lsf_worker/10.1/linux3.10-glibc2.17-x86_64/etc/ || exit -sed -i "s|/opt/ibm/lsf/|/opt/ibm/lsf_worker/|g" lsf_daemons -cd - || exit +chown -R lsfadmin $LSF_TOP +chown -R lsfadmin $LSF_WORK -sudo /opt/ibm/lsf_worker/10.1/install/hostsetup --top="/opt/ibm/lsf_worker" --setuid | sudo tee -a "$logfile" -/opt/ibm/lsf_worker/10.1/install/hostsetup --top="/opt/ibm/lsf_worker" --boot="y" --start="y" --dynamic >> "$logfile" 2>&1 +# Restart the lsfd servive +service lsfd stop && sleep 2 && service lsfd start +sleep 10 # Setting up the LDAP configuration if [ "$enable_ldap" = "true" ]; then - # Detect if the operating system is RHEL or Rocky Linux - if grep -q "NAME=\"Red Hat Enterprise Linux\"" /etc/os-release || grep -q "NAME=\"Rocky Linux\"" /etc/os-release; then - - # Detect RHEL or Rocky version - version=$(grep -oE 'release [0-9]+' /etc/redhat-release | awk '{print $2}') - - # Proceed if the detected version is either 8 or 9 - if [ "$version" == "8" ] || [ "$version" == "9" ]; then - echo "Detected as RHEL or Rocky $version. Proceeding with LDAP client configuration..." >> $logfile - - # Enable password authentication for SSH by modifying the configuration file - sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config - systemctl restart sshd - - # Check if the SSL certificate file exists, then copy it to the correct location - # Retry finding SSL certificate with a maximum of 5 attempts and 5 seconds sleep between retries - for attempt in {1..5}; do - if [ -f "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" ]; then - echo "LDAP SSL cert found under {{ ha_shared_dir }}/openldap/ldap_cacert.pem path" >> $logfile - mkdir -p /etc/openldap/certs/ - cp -pr "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" "/etc/openldap/certs/ldap_cacert.pem" - break - else - echo "SSL cert not found on attempt $attempt. Retrying in 5 seconds..." >> $logfile - sleep 5 - fi - done - # Exit if the SSL certificate is still not found after 5 attempts - [ -f "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" ] || { echo "SSL cert not found after 5 attempts. Exiting." >> $logfile; exit 1; } - - # Create and configure the SSSD configuration file for LDAP integration - cat < /etc/sssd/sssd.conf + # Detect if the operating system is RHEL or Rocky Linux + if grep -q "NAME=\"Red Hat Enterprise Linux\"" /etc/os-release || grep -q "NAME=\"Rocky Linux\"" /etc/os-release; then + + # Detect RHEL or Rocky version + version=$(grep -oE 'release [0-9]+' /etc/redhat-release | awk '{print $2}') + + # Proceed if the detected version is either 8 or 9 + if [ "$version" == "8" ] || [ "$version" == "9" ]; then + echo "Detected as RHEL or Rocky $version. Proceeding with LDAP client configuration..." >>$logfile + + # Enable password authentication for SSH by modifying the configuration file + sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config + systemctl restart sshd + + # Check if the SSL certificate file exists, then copy it to the correct location + # Retry finding SSL certificate with a maximum of 5 attempts and 5 seconds sleep between retries + for attempt in {1..5}; do + if [ -f "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" ]; then + echo "LDAP SSL cert found under {{ ha_shared_dir }}/openldap/ldap_cacert.pem path" >>$logfile + mkdir -p /etc/openldap/certs/ + cp -pr "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" "/etc/openldap/certs/ldap_cacert.pem" + break + else + echo "SSL cert not found on attempt $attempt. Retrying in 5 seconds..." >>$logfile + sleep 5 + fi + done + # Exit if the SSL certificate is still not found after 5 attempts + [ -f "{{ ha_shared_dir }}/openldap/ldap_cacert.pem" ] || { + echo "SSL cert not found after 5 attempts. Exiting." >>$logfile + exit 1 + } + + # Create and configure the SSSD configuration file for LDAP integration + cat </etc/sssd/sssd.conf [sssd] config_file_version = 2 services = nss, pam, autofs @@ -279,73 +283,75 @@ cache_credentials = True ldap_tls_reqcert = allow EOF - # Secure the SSSD configuration file by setting appropriate permissions - chmod 600 /etc/sssd/sssd.conf - chown root:root /etc/sssd/sssd.conf + # Secure the SSSD configuration file by setting appropriate permissions + chmod 600 /etc/sssd/sssd.conf + chown root:root /etc/sssd/sssd.conf - # Create and configure the OpenLDAP configuration file for TLS - cat < /etc/openldap/ldap.conf + # Create and configure the OpenLDAP configuration file for TLS + cat </etc/openldap/ldap.conf BASE dc=${ldap_basedns%%.*},dc=${ldap_basedns#*.} URI ldap://${ldap_server} TLS_CACERT /etc/openldap/certs/ldap_cacert.pem TLS_CACERTDIR /etc/openldap/certs EOF - # Rehash certificates in the OpenLDAP directory to ensure proper recognition - openssl rehash /etc/openldap/certs + # Rehash certificates in the OpenLDAP directory to ensure proper recognition + openssl rehash /etc/openldap/certs - # Apply the SSSD and home directory creation configuration using authselect - authselect select sssd with-mkhomedir --force + # Apply the SSSD and home directory creation configuration using authselect + authselect select sssd with-mkhomedir --force - # Enable and start the SSSD and oddjobd services for user authentication and home directory management - systemctl enable --now sssd oddjobd + # Enable and start the SSSD and oddjobd services for user authentication and home directory management + systemctl enable --now sssd oddjobd - # Restart both services to apply the configuration - systemctl restart sssd oddjobd + # Restart both services to apply the configuration + systemctl restart sssd oddjobd - # Validate the LDAP configuration by performing a test search using ldapsearch - if ldapsearch -x -H ldap://"${ldap_server}"/ -b "dc=${ldap_basedns%%.*},dc=${ldap_basedns#*.}" > /dev/null; then - echo "LDAP configuration completed successfully!" >> $logfile - else - echo "LDAP configuration failed! Exiting." >> $logfile - exit 1 - fi + # Validate the LDAP configuration by performing a test search using ldapsearch + if ldapsearch -x -H ldap://"${ldap_server}"/ -b "dc=${ldap_basedns%%.*},dc=${ldap_basedns#*.}" >/dev/null; then + echo "LDAP configuration completed successfully!" >>$logfile + else + echo "LDAP configuration failed! Exiting." >>$logfile + exit 1 + fi - # Ensure LSF commands are available to all users by adding the profile to bashrc - echo ". ${LSF_CONF}/profile.lsf" >> /etc/bashrc - source /etc/bashrc + # Ensure LSF commands are available to all users by adding the profile to bashrc + echo ". ${LSF_CONF}/profile.lsf" >>/etc/bashrc + source /etc/bashrc - else - echo "This script is intended for RHEL and Rocky Linux 8 or 9. Detected version: $version. Exiting." >> $logfile - exit 1 - fi + else + echo "This script is intended for RHEL and Rocky Linux 8 or 9. Detected version: $version. Exiting." >>$logfile + exit 1 fi + fi +else + echo "Skipping LDAP Client configuration as it is not enabled." >>$logfile fi # Setting up the Cloud Monitoring Agent if [ "$cloud_monitoring_access_key" != "" ] && [ "$cloud_monitoring_ingestion_url" != "" ]; then - SYSDIG_CONFIG_FILE="/opt/draios/etc/dragent.yaml" + SYSDIG_CONFIG_FILE="/opt/draios/etc/dragent.yaml" - #packages installation - echo "Writing sysdig config file" >> "$logfile" + #packages installation + echo "Writing sysdig config file" >>"$logfile" - #sysdig config file - echo "Setting customerid access key" >> "$logfile" - sed -i "s/==ACCESSKEY==/$cloud_monitoring_access_key/g" $SYSDIG_CONFIG_FILE - sed -i "s/==COLLECTOR==/$cloud_monitoring_ingestion_url/g" $SYSDIG_CONFIG_FILE - echo "tags: type:compute,lsf:true" >> $SYSDIG_CONFIG_FILE + #sysdig config file + echo "Setting customerid access key" >>"$logfile" + sed -i "s/==ACCESSKEY==/$cloud_monitoring_access_key/g" $SYSDIG_CONFIG_FILE + sed -i "s/==COLLECTOR==/$cloud_monitoring_ingestion_url/g" $SYSDIG_CONFIG_FILE + echo "tags: type:compute,lsf:true" >>$SYSDIG_CONFIG_FILE else - echo "Skipping metrics agent configuration due to missing parameters" >> "$logfile" + echo "Skipping metrics agent configuration due to missing parameters" >>"$logfile" fi if [ "$observability_monitoring_on_compute_nodes_enable" = true ]; then - echo "Restarting sysdig agent" >> "$logfile" - systemctl enable dragent - systemctl restart dragent - else - echo "Metrics agent start skipped since monitoring provisioning is not enabled" >> "$logfile" + echo "Restarting sysdig agent" >>"$logfile" + systemctl enable dragent + systemctl restart dragent +else + echo "Metrics agent start skipped since monitoring provisioning is not enabled" >>"$logfile" fi # Setting up the IBM Cloud Logs @@ -355,7 +361,7 @@ if [ "$observability_logs_enable_for_compute" = true ]; then sudo cp /root/post-config.sh /opt/ibm cd /opt/ibm || exit - cat < /etc/fluent-bit/fluent-bit.conf + cat </etc/fluent-bit/fluent-bit.conf [SERVICE] Flush 1 Log_Level info @@ -383,10 +389,10 @@ if [ "$observability_logs_enable_for_compute" = true ]; then [INPUT] Name tail Tag * - Path /opt/ibm/lsf_worker/log/*.log.* + Path /opt/ibm/lsflogs/*.log.* Path_Key file Exclude_Path /var/log/at/** - DB /opt/ibm/lsf_worker/log/fluent-bit.DB + DB /opt/ibm/lsflogs/fluent-bit.DB Buffer_Chunk_Size 32KB Buffer_Max_Size 256KB Skip_Long_Lines On @@ -405,10 +411,10 @@ EOL sudo chmod +x post-config.sh sudo ./post-config.sh -h "$cloud_logs_ingress_private_endpoint" -p "3443" -t "/logs/v1/singles" -a IAMAPIKey -k "$VPC_APIKEY_VALUE" --send-directly-to-icl -s true -i Production - echo "INFO Testing IBM Cloud LSF Logs from compute: $hostname" | sudo tee -a /opt/ibm/lsf_worker/log/test.log.com > /dev/null + echo "INFO Testing IBM Cloud LSF Logs from compute: $hostname" | sudo tee -a /opt/ibm/lsflogs/test.log.com >/dev/null sudo logger -u /tmp/in_syslog my_ident my_syslog_test_message_from_compute:"$hostname" else echo "Cloud Logs configuration skipped since observability logs for compute is not enabled" fi -echo "COMPLETED $(date '+%Y-%m-%d %H:%M:%S')" >> $logfile +echo "COMPLETED $(date '+%Y-%m-%d %H:%M:%S')" >>$logfile diff --git a/modules/ansible-roles/roles/lsf_mgmt_config/vars/main.yml b/modules/ansible-roles/roles/lsf_mgmt_config/vars/main.yml index ea723e83..d0d17636 100644 --- a/modules/ansible-roles/roles/lsf_mgmt_config/vars/main.yml +++ b/modules/ansible-roles/roles/lsf_mgmt_config/vars/main.yml @@ -5,4 +5,5 @@ LSF_LSBATCH_CONF: "{{ LSF_CONF_FILE_PATH }}/lsbatch/{{ prefix }}/configdir" LSF_HOSTS_FILE: "{{ LSF_CONF_FILE_PATH }}/hosts" LSF_EGO_CONF_FILE: "{{ LSF_CONF_FILE }}/ego/{{ prefix }}/kernel/ego.conf" LSF_CLUSTER_FILE: "{{ LSF_CONF_FILE_PATH }}/lsf.cluster.{{ prefix }}" -hyperthreading_file: "/root/lsf_hyperthreading" +LSF_EXT_CONF: "/opt/ibm/lsfsuite/ext" +JS_PAC_SERVER_URL: "{{ LSF_EXT_CONF }}/ppm/conf/js.conf" diff --git a/modules/ansible-roles/roles/lsf_post_config/tasks/cluster_validation.yml b/modules/ansible-roles/roles/lsf_post_config/tasks/cluster_validation.yml new file mode 100644 index 00000000..f16b9361 --- /dev/null +++ b/modules/ansible-roles/roles/lsf_post_config/tasks/cluster_validation.yml @@ -0,0 +1,43 @@ +--- +- name: MTU Check | Restart NetworkManager if MTU 9000 is not configured + ansible.builtin.shell: | + ip route show | grep -q 'mtu 9000' || { + systemctl restart NetworkManager + echo "restarted" + } + register: mtu_check_result + changed_when: "'restarted' in mtu_check_result.stdout" + when: inventory_hostname in groups['mgmt_compute_nodes'] + +- name: LSF Version | Retrieve lsid output + ansible.builtin.shell: lsid + register: lsid_output + changed_when: false + when: inventory_hostname == groups['management_nodes'][0] + +- name: LSF Version | Display lsid output + ansible.builtin.debug: + msg: "{{ lsid_output.stdout }}" + when: inventory_hostname == groups['management_nodes'][0] + +- name: Cluster Status | Fetch node status using bhosts + ansible.builtin.shell: bhosts -w + register: cluster_status_output + changed_when: false + when: inventory_hostname == groups['management_nodes'][0] + +- name: Cluster Status | Show node status + ansible.builtin.debug: + msg: "{{ cluster_status_output.stdout }}" + when: inventory_hostname == groups['management_nodes'][0] + +- name: Cluster Health | Restart lsfd if any node is unreach or unavail + ansible.builtin.shell: | + if bhosts -w | grep -Eq 'unreach|unavail'; then + systemctl restart lsfd + sleep 5 + echo "lsfd restarted" + fi + register: lsfd_restart_result + changed_when: "'lsfd restarted' in lsfd_restart_result.stdout" + when: inventory_hostname == groups['management_nodes'][0] diff --git a/modules/ansible-roles/roles/lsf_post_config/tasks/configure_shared_folders.yml b/modules/ansible-roles/roles/lsf_post_config/tasks/configure_shared_folders.yml new file mode 100644 index 00000000..650a928e --- /dev/null +++ b/modules/ansible-roles/roles/lsf_post_config/tasks/configure_shared_folders.yml @@ -0,0 +1,116 @@ +--- + +# - name: Log directories | Remove duplicate logs +# ansible.builtin.shell: > +# find /opt/ibm/lsflogs -type f ! -name "*.{{ dns_domain_names }}" ! -name "ibmcloudgen2*" -delete +# become: true +# when: inventory_hostname in groups['management_nodes'] + +- name: Log directories | Setup shared base directories + file: + path: "{{ item.path }}" + state: directory + mode: "{{ item.mode | default('0755') }}" + recurse: yes + owner: lsfadmin + group: root + loop: + - { path: "{{ SHARED_PATH }}/data", mode: '0755' } + - { path: "{{ SHARED_PATH }}/logs", mode: '0755' } + - { path: "{{ SHARED_PATH }}/repository-path", mode: '0755' } + - { path: "{{ SHARED_PATH }}/das_staging_area", mode: '0755' } + when: inventory_hostname == groups['management_nodes'][0] + +- name: Log directories | Create per-host log directory under shared path + file: + path: "{{ SHARED_PATH }}/logs/{{ inventory_hostname }}" + state: directory + mode: '0777' + owner: lsfadmin + group: root + when: inventory_hostname in groups['management_nodes'] + +- name: Log directories | Move existing logs to shared per-host directory + shell: | + mv {{ LSF_LOGS }}/* {{ SHARED_PATH }}/logs/{{ inventory_hostname }}/ 2>/dev/null || true + args: + warn: false + when: inventory_hostname in groups['management_nodes'] + +- name: Log directories | Remove original LSF logs directory if it exists + file: + path: "{{ LSF_LOGS }}" + state: absent + ignore_errors: true + when: inventory_hostname in groups['management_nodes'] + +- name: Log directories | Create symlink from shared per-host logs to LSF log path + file: + src: "{{ SHARED_PATH }}/logs/{{ inventory_hostname }}" + dest: "{{ LSF_LOGS }}" + state: link + force: true + owner: lsfadmin + group: root + when: inventory_hostname in groups['management_nodes'] + +- name: Log directories | Ensure correct permissions on per-host logs directory + file: + path: "{{ SHARED_PATH }}/logs/{{ inventory_hostname }}" + state: directory + mode: '0777' + recurse: yes + owner: lsfadmin + group: root + when: inventory_hostname in groups['management_nodes'] + +- name: Log directories | Create symlink to shared data directory + file: + src: "{{ SHARED_PATH }}/data" + dest: "{{ LSF_TOP }}/work/data" + state: link + force: yes + when: inventory_hostname == groups['management_nodes'][0] + +- name: Log directories | Ensure ownership of shared data directory + file: + path: "{{ SHARED_PATH }}/data" + state: directory + recurse: yes + owner: lsfadmin + group: root + when: inventory_hostname == groups['management_nodes'][0] + +- name: Log directories | Ensure symlink for das_staging_area under LSF_TOP + shell: | + [ -L "{{ LSF_TOP }}/{{ item }}" ] && echo "Symlink exists, skipping." || { \ + [ -d "{{ LSF_TOP }}/{{ item }}" ] && rm -rf "{{ LSF_TOP }}/{{ item }}"; \ + ln -s "{{ SHARED_PATH }}/{{ item }}" "{{ LSF_TOP }}/{{ item }}"; } + loop: + - das_staging_area + when: inventory_hostname == groups['management_nodes'][0] + +- name: Read LSF hosts file from shared path + slurp: + src: "{{ SHARED_PATH }}/lsf/conf/hosts" + register: lsf_hosts_file + when: inventory_hostname == groups['login_node'][0] + +- name: Override system /etc/hosts with LSF Login hosts + blockinfile: + path: /etc/hosts + block: "{{ lsf_hosts_file.content | b64decode }}" + marker: "# {mark} LSF HOSTS BLOCK" + create: yes + become: yes + when: inventory_hostname == groups['login_node'][0] + +- name: Copy Hosts | Copy LSF hosts file to /etc/hosts + ansible.builtin.copy: + src: "{{ SHARED_PATH }}/lsf/conf/hosts" + dest: /etc/hosts + owner: lsfadmin + group: root + mode: preserve + remote_src: yes + when: inventory_hostname == groups['login_node'][0] diff --git a/modules/ansible-roles/roles/lsf_post_config/tasks/main.yml b/modules/ansible-roles/roles/lsf_post_config/tasks/main.yml new file mode 100644 index 00000000..9ce4a08f --- /dev/null +++ b/modules/ansible-roles/roles/lsf_post_config/tasks/main.yml @@ -0,0 +1,13 @@ +--- + +# Set correct ownership and permissions on shared directories +- import_tasks: permissions_setup.yml + +# Configure shared folders and create necessary symlinks on management nodes +- import_tasks: configure_shared_folders.yml + +# Reload or restart services to apply the new configurations +- import_tasks: reload_services.yml + +# Cluster validation +- import_tasks: cluster_validation.yml diff --git a/modules/ansible-roles/roles/lsf_post_config/tasks/permissions_setup.yml b/modules/ansible-roles/roles/lsf_post_config/tasks/permissions_setup.yml new file mode 100644 index 00000000..b7e06909 --- /dev/null +++ b/modules/ansible-roles/roles/lsf_post_config/tasks/permissions_setup.yml @@ -0,0 +1,19 @@ +--- + +# Set recursive permissions for LSF Suite directory to 0755 +- name: Change permissions of lsfsuite to 0777 + ansible.builtin.command: "chmod -R 0755 {{ LSF_SUITE }}" + +# Set ownership of LSF Suite directory to lsfadmin +- name: Change ownership of lsfsuite to lsfadmin + ansible.builtin.command: "chown -R lsfadmin {{ LSF_SUITE }}" + +# Set recursive permissions for shared path +- name: Change permissions of {{ SHARED_PATH }} to 0777 + ansible.builtin.command: "chmod -R 0755 {{ SHARED_PATH }}" + when: inventory_hostname == groups['management_nodes'][0] + +# Set ownership of shared path to lsfadmin +- name: Change ownership of {{ SHARED_PATH }} to lsfadmin + ansible.builtin.command: "chown -R lsfadmin {{ SHARED_PATH }}" + when: inventory_hostname == groups['management_nodes'][0] diff --git a/modules/ansible-roles/roles/lsf_post_config/tasks/reload_services.yml b/modules/ansible-roles/roles/lsf_post_config/tasks/reload_services.yml new file mode 100644 index 00000000..53d5712e --- /dev/null +++ b/modules/ansible-roles/roles/lsf_post_config/tasks/reload_services.yml @@ -0,0 +1,14 @@ +--- + +# Restart LSF daemon (lsfd) service on the first management node +- name: Restart lsfd service + service: + name: lsfd + state: restarted + when: inventory_hostname == groups['management_nodes'][0] + +# Restart the NetworkManager service on all nodes +- name: Restart NetworkManager + service: + name: NetworkManager + state: restarted diff --git a/modules/ansible-roles/roles/lsf_post_config/vars/main.yml b/modules/ansible-roles/roles/lsf_post_config/vars/main.yml new file mode 100644 index 00000000..6e28bb54 --- /dev/null +++ b/modules/ansible-roles/roles/lsf_post_config/vars/main.yml @@ -0,0 +1,6 @@ +--- + +SHARED_PATH: "/mnt/lsf" +LSF_SUITE: "/opt/ibm/lsfsuite" +LSF_TOP: "{{ LSF_SUITE }}/lsf" +LSF_LOGS: "/opt/ibm/lsflogs" diff --git a/modules/ansible-roles/roles/lsf_prereq_config/tasks/hyperthreading.yml b/modules/ansible-roles/roles/lsf_prereq_config/tasks/hyperthreading.yml new file mode 100644 index 00000000..ad289c83 --- /dev/null +++ b/modules/ansible-roles/roles/lsf_prereq_config/tasks/hyperthreading.yml @@ -0,0 +1,22 @@ +--- +- name: Hyperthreading | Create LSF hyperthreading script for disabling threads if hyperthreading is false + copy: + dest: "{{ hyperthreading_file }}" + content: | + #!/bin/sh + for vcpu in $(cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | cut -s -d- -f2 | cut -d- -f2 | uniq); do + echo "0" > "/sys/devices/system/cpu/cpu"$vcpu"/online" + done + mode: '0755' + when: not enable_hyperthreading + +- name: Hyperthreading | Run the hyperthreading script and add to cron if hyperthreading is false + shell: "{{ hyperthreading_file }}" + when: not enable_hyperthreading + +- name: Hyperthreading | Add script to cron for reboot if hyperthreading is false + cron: + name: "Disable Hyperthreading" + special_time: reboot + job: "{{ hyperthreading_file }}" + when: not enable_hyperthreading diff --git a/modules/ansible-roles/roles/lsf_server_config/tasks/lsf_tunables.yml b/modules/ansible-roles/roles/lsf_prereq_config/tasks/lsf_tunables.yml similarity index 100% rename from modules/ansible-roles/roles/lsf_server_config/tasks/lsf_tunables.yml rename to modules/ansible-roles/roles/lsf_prereq_config/tasks/lsf_tunables.yml diff --git a/modules/ansible-roles/roles/lsf/tasks/lsfadmin_creation.yml b/modules/ansible-roles/roles/lsf_prereq_config/tasks/lsfadmin_creation.yml similarity index 98% rename from modules/ansible-roles/roles/lsf/tasks/lsfadmin_creation.yml rename to modules/ansible-roles/roles/lsf_prereq_config/tasks/lsfadmin_creation.yml index 318e9d5f..1db4d6c8 100644 --- a/modules/ansible-roles/roles/lsf/tasks/lsfadmin_creation.yml +++ b/modules/ansible-roles/roles/lsf_prereq_config/tasks/lsfadmin_creation.yml @@ -14,6 +14,7 @@ create_home: yes home: /home/lsfadmin shell: /bin/bash + uid: 1005 when: user_info.failed register: user_created diff --git a/modules/ansible-roles/roles/lsf_server_config/tasks/lsfadmin_password_less_auth.yml b/modules/ansible-roles/roles/lsf_prereq_config/tasks/lsfadmin_password_less_auth.yml similarity index 100% rename from modules/ansible-roles/roles/lsf_server_config/tasks/lsfadmin_password_less_auth.yml rename to modules/ansible-roles/roles/lsf_prereq_config/tasks/lsfadmin_password_less_auth.yml diff --git a/modules/ansible-roles/roles/lsf_server_config/tasks/main.yml b/modules/ansible-roles/roles/lsf_prereq_config/tasks/main.yml similarity index 65% rename from modules/ansible-roles/roles/lsf_server_config/tasks/main.yml rename to modules/ansible-roles/roles/lsf_prereq_config/tasks/main.yml index e1ff9a7b..4902d8f6 100644 --- a/modules/ansible-roles/roles/lsf_server_config/tasks/main.yml +++ b/modules/ansible-roles/roles/lsf_prereq_config/tasks/main.yml @@ -6,5 +6,11 @@ # lsf_tunables configuration - import_tasks: lsf_tunables.yml +# Create lsfadmin user +- import_tasks: lsfadmin_creation.yml + # Setup Password less authentication for lsfadmin user - import_tasks: lsfadmin_password_less_auth.yml + +# Disable Hyperthreading +- import_tasks: hyperthreading.yml diff --git a/modules/ansible-roles/roles/lsf_server_config/tasks/mtu_configuration.yml b/modules/ansible-roles/roles/lsf_prereq_config/tasks/mtu_configuration.yml similarity index 100% rename from modules/ansible-roles/roles/lsf_server_config/tasks/mtu_configuration.yml rename to modules/ansible-roles/roles/lsf_prereq_config/tasks/mtu_configuration.yml diff --git a/modules/ansible-roles/roles/lsf_server_config/vars/main.yml b/modules/ansible-roles/roles/lsf_prereq_config/vars/main.yml similarity index 76% rename from modules/ansible-roles/roles/lsf_server_config/vars/main.yml rename to modules/ansible-roles/roles/lsf_prereq_config/vars/main.yml index e934d3af..39a523b8 100644 --- a/modules/ansible-roles/roles/lsf_server_config/vars/main.yml +++ b/modules/ansible-roles/roles/lsf_prereq_config/vars/main.yml @@ -2,3 +2,4 @@ network_interface: "eth0" network_script_path: "/etc/sysconfig/network-scripts" sysctl_conf: "/etc/sysctl.conf" lsfadmin_ssh_path: "/home/lsfadmin/.ssh" +hyperthreading_file: "/root/lsf_hyperthreading" diff --git a/modules/ansible-roles/roles/lsf/tasks/lsf_inventory.yml b/modules/ansible-roles/roles/lsf_template_config/tasks/lsf_inventory.yml similarity index 70% rename from modules/ansible-roles/roles/lsf/tasks/lsf_inventory.yml rename to modules/ansible-roles/roles/lsf_template_config/tasks/lsf_inventory.yml index c5276b41..8b48552f 100644 --- a/modules/ansible-roles/roles/lsf/tasks/lsf_inventory.yml +++ b/modules/ansible-roles/roles/lsf_template_config/tasks/lsf_inventory.yml @@ -5,8 +5,8 @@ template: src: fp14-inventory.j2 dest: "{{ inventory_path }}/lsf-inventory" - # delegate_to: localhost - delegate_to: "{{ lsf_deployer_hostname }}.{{ dns_domain_names }}" + delegate_to: localhost + run_once: true when: lsf_version == "fixpack_14" @@ -14,8 +14,7 @@ template: src: fp14-config.j2 dest: "{{ inventory_path }}/lsf-config.yml" - # delegate_to: localhost - delegate_to: "{{ lsf_deployer_hostname }}.{{ dns_domain_names }}" + delegate_to: localhost run_once: true when: lsf_version == "fixpack_14" @@ -23,8 +22,7 @@ template: src: fp15-inventory.j2 dest: "{{ inventory_path }}/lsf-inventory" - # delegate_to: localhost - delegate_to: "{{ lsf_deployer_hostname }}.{{ dns_domain_names }}" + delegate_to: localhost run_once: true when: lsf_version == "fixpack_15" @@ -32,8 +30,7 @@ template: src: fp15-config.j2 dest: "{{ inventory_path }}/lsf-config.yml" - # delegate_to: localhost - delegate_to: "{{ lsf_deployer_hostname }}.{{ dns_domain_names }}" + delegate_to: localhost run_once: true when: lsf_version == "fixpack_15" @@ -43,6 +40,5 @@ path: "{{ inventory_path }}/group_vars/all" regexp: "^deployer_hostname: .*" line: "deployer_hostname: {{ lsf_deployer_hostname }}" - # backup: yes - delegate_to: "{{ lsf_deployer_hostname }}.{{ dns_domain_names }}" + delegate_to: localhost run_once: true diff --git a/modules/ansible-roles/roles/lsf/tasks/lsf_prepare.yml b/modules/ansible-roles/roles/lsf_template_config/tasks/lsf_prepare.yml similarity index 100% rename from modules/ansible-roles/roles/lsf/tasks/lsf_prepare.yml rename to modules/ansible-roles/roles/lsf_template_config/tasks/lsf_prepare.yml diff --git a/modules/ansible-roles/roles/lsf/tasks/main.yml b/modules/ansible-roles/roles/lsf_template_config/tasks/main.yml similarity index 79% rename from modules/ansible-roles/roles/lsf/tasks/main.yml rename to modules/ansible-roles/roles/lsf_template_config/tasks/main.yml index 186976d7..942cc3f4 100644 --- a/modules/ansible-roles/roles/lsf/tasks/main.yml +++ b/modules/ansible-roles/roles/lsf_template_config/tasks/main.yml @@ -6,8 +6,5 @@ # tasks file for template tasks - import_tasks: lsf_inventory.yml -# Create lsfadmin user -- import_tasks: lsfadmin_creation.yml - # Install Python dependencies - import_tasks: python_installation.yml diff --git a/modules/ansible-roles/roles/lsf/tasks/python_installation.yml b/modules/ansible-roles/roles/lsf_template_config/tasks/python_installation.yml similarity index 100% rename from modules/ansible-roles/roles/lsf/tasks/python_installation.yml rename to modules/ansible-roles/roles/lsf_template_config/tasks/python_installation.yml diff --git a/modules/ansible-roles/roles/lsf/templates/fp14-config.j2 b/modules/ansible-roles/roles/lsf_template_config/templates/fp14-config.j2 similarity index 100% rename from modules/ansible-roles/roles/lsf/templates/fp14-config.j2 rename to modules/ansible-roles/roles/lsf_template_config/templates/fp14-config.j2 diff --git a/modules/ansible-roles/roles/lsf/templates/fp14-inventory.j2 b/modules/ansible-roles/roles/lsf_template_config/templates/fp14-inventory.j2 similarity index 99% rename from modules/ansible-roles/roles/lsf/templates/fp14-inventory.j2 rename to modules/ansible-roles/roles/lsf_template_config/templates/fp14-inventory.j2 index 557b8e7b..b15eabfb 100644 --- a/modules/ansible-roles/roles/lsf/templates/fp14-inventory.j2 +++ b/modules/ansible-roles/roles/lsf_template_config/templates/fp14-inventory.j2 @@ -54,3 +54,5 @@ localhost {% for host in db_hosts %} {{ host }} {% endfor %} + +[LSF_WebService] diff --git a/modules/ansible-roles/roles/lsf/templates/fp15-config.j2 b/modules/ansible-roles/roles/lsf_template_config/templates/fp15-config.j2 similarity index 100% rename from modules/ansible-roles/roles/lsf/templates/fp15-config.j2 rename to modules/ansible-roles/roles/lsf_template_config/templates/fp15-config.j2 diff --git a/modules/ansible-roles/roles/lsf/templates/fp15-inventory.j2 b/modules/ansible-roles/roles/lsf_template_config/templates/fp15-inventory.j2 similarity index 100% rename from modules/ansible-roles/roles/lsf/templates/fp15-inventory.j2 rename to modules/ansible-roles/roles/lsf_template_config/templates/fp15-inventory.j2 diff --git a/modules/ansible-roles/roles/lsf/vars/main.yml b/modules/ansible-roles/roles/lsf_template_config/vars/main.yml similarity index 100% rename from modules/ansible-roles/roles/lsf/vars/main.yml rename to modules/ansible-roles/roles/lsf_template_config/vars/main.yml diff --git a/modules/ansible-roles/roles/vpc_fileshare_configure/handlers/main.yml b/modules/ansible-roles/roles/vpc_fileshare_config/handlers/main.yml similarity index 100% rename from modules/ansible-roles/roles/vpc_fileshare_configure/handlers/main.yml rename to modules/ansible-roles/roles/vpc_fileshare_config/handlers/main.yml diff --git a/modules/ansible-roles/roles/vpc_fileshare_configure/tasks/main.yml b/modules/ansible-roles/roles/vpc_fileshare_config/tasks/main.yml similarity index 100% rename from modules/ansible-roles/roles/vpc_fileshare_configure/tasks/main.yml rename to modules/ansible-roles/roles/vpc_fileshare_config/tasks/main.yml diff --git a/modules/ansible-roles/roles/vpc_fileshare_configure/tasks/vpc_fileshare_configure.yml b/modules/ansible-roles/roles/vpc_fileshare_config/tasks/vpc_fileshare_configure.yml similarity index 87% rename from modules/ansible-roles/roles/vpc_fileshare_configure/tasks/vpc_fileshare_configure.yml rename to modules/ansible-roles/roles/vpc_fileshare_config/tasks/vpc_fileshare_configure.yml index 8ff4e039..9e47e6e8 100644 --- a/modules/ansible-roles/roles/vpc_fileshare_configure/tasks/vpc_fileshare_configure.yml +++ b/modules/ansible-roles/roles/vpc_fileshare_config/tasks/vpc_fileshare_configure.yml @@ -62,3 +62,11 @@ register: ls_output changed_when: false failed_when: ls_output.rc != 0 + +- name: Set appropriate permissions on base dirs + ansible.builtin.shell: | + chmod -R {{ (item.key == '/mnt/lsf') | ternary('0755', '0777') }} "{{ item.key }}" + args: + warn: false + with_dict: "{{ name_mount_path_map }}" + run_once: true diff --git a/modules/baremetal/datasource.tf b/modules/baremetal/datasource.tf index 71e65999..abcaa844 100644 --- a/modules/baremetal/datasource.tf +++ b/modules/baremetal/datasource.tf @@ -1,8 +1,13 @@ -data "ibm_resource_group" "existing_resource_group" { - name = var.existing_resource_group -} +# data "ibm_resource_group" "existing_resource_group" { +# name = var.existing_resource_group +# } data "ibm_is_image" "storage" { count = length(var.storage_servers) name = var.storage_servers[count.index]["image"] } + +data "ibm_is_bare_metal_server_profile" "itself" { + count = length(var.storage_servers) + name = var.storage_servers[count.index]["profile"] +} \ No newline at end of file diff --git a/modules/baremetal/locals.tf b/modules/baremetal/locals.tf index 6fba7bc8..734ccdfe 100644 --- a/modules/baremetal/locals.tf +++ b/modules/baremetal/locals.tf @@ -3,12 +3,60 @@ locals { prefix = var.prefix storage_image_id = data.ibm_is_image.storage[*].id storage_node_name = format("%s-%s", local.prefix, "strg") - resource_group_id = data.ibm_resource_group.existing_resource_group.id - bms_interfaces = ["ens1", "ens2"] - #storage_ssh_keys = [for name in var.storage_ssh_keys : data.ibm_is_ssh_key.storage[name].id] + # resource_group_id = data.ibm_resource_group.existing_resource_group.id + # bms_interfaces = ["ens1", "ens2"] + bms_interfaces = ["eth0", "eth1"] + # storage_ssh_keys = [for name in var.storage_ssh_keys : data.ibm_is_ssh_key.storage[name].id] # TODO: explore (DA always keep it true) #skip_iam_authorization_policy = true storage_server_count = sum(var.storage_servers[*]["count"]) enable_storage = local.storage_server_count > 0 + + user_data_vars = { + dns_domain = var.dns_domain_names["storage"], + enable_protocol = "", + protocol_domain = "", + vpc_region = "", + protocol_subnet_id = "", + resource_group_id = var.existing_resource_group, + bastion_public_key_content = base64encode(var.bastion_public_key_content != null ? var.bastion_public_key_content : ""), + vsi_meta_private_key = base64encode(var.storage_private_key_content), + vsi_meta_public_key = base64encode(var.storage_public_key_content) + } + # sapphire_rapids_profile_check = strcontains(var.vsi_profile, "3-metal") || strcontains(var.vsi_profile, "3d-metal") + + raw_bm_details = flatten([ + for module_instance in module.storage_baremetal : [ + for server_key, server_details in module_instance.baremetal_servers : + { + id = server_details.bms_server_id + name = server_details.bms_server_name + ipv4_address = try(server_details.bms_server_ip, "") + vni_id = server_details.bms_vni_id + } + ] + ]) + + bm_server_name = flatten(local.raw_bm_details[*].name) + bm_server_ips = flatten(local.raw_bm_details[*].ipv4_address) + + disk0_interface_type = (data.ibm_is_bare_metal_server_profile.itself[*].disks[0].supported_interface_types[0].default)[0] + disk_count = (data.ibm_is_bare_metal_server_profile.itself[*].disks[1].quantity[0].value)[0] + + # Determine starting disk based on disk0 interface type + nvme_start_disk = local.disk0_interface_type == "sata" ? "0" : "1" + + # Generate NVMe device list up to 36 disks + all_disks = [ + "/dev/nvme0n1", "/dev/nvme1n1", "/dev/nvme2n1", "/dev/nvme3n1", "/dev/nvme4n1", "/dev/nvme5n1", + "/dev/nvme6n1", "/dev/nvme7n1", "/dev/nvme8n1", "/dev/nvme9n1", "/dev/nvme10n1", "/dev/nvme11n1", + "/dev/nvme12n1", "/dev/nvme13n1", "/dev/nvme14n1", "/dev/nvme15n1", "/dev/nvme16n1", "/dev/nvme17n1", + "/dev/nvme18n1", "/dev/nvme19n1", "/dev/nvme20n1", "/dev/nvme21n1", "/dev/nvme22n1", "/dev/nvme23n1", + "/dev/nvme24n1", "/dev/nvme25n1", "/dev/nvme26n1", "/dev/nvme27n1", "/dev/nvme28n1", "/dev/nvme29n1", + "/dev/nvme30n1", "/dev/nvme31n1", "/dev/nvme32n1", "/dev/nvme33n1", "/dev/nvme34n1", "/dev/nvme35n1" + ] + + # Select only the required number of disks + selected_disks = slice(local.all_disks, local.nvme_start_disk, local.disk_count + local.nvme_start_disk) } diff --git a/modules/baremetal/main.tf b/modules/baremetal/main.tf index 9f77bbb3..d4bc9d01 100644 --- a/modules/baremetal/main.tf +++ b/modules/baremetal/main.tf @@ -1,7 +1,7 @@ -module "storage_key" { - count = local.enable_storage ? 1 : 0 - source = "./../key" -} +# module "storage_key" { +# count = local.enable_storage ? 1 : 0 +# source = "./../key" +# } module "storage_baremetal" { source = "terraform-ibm-modules/bare-metal-vpc/ibm" @@ -17,7 +17,46 @@ module "storage_baremetal" { bandwidth = var.bandwidth allowed_vlan_ids = var.allowed_vlan_ids access_tags = null - resource_group_id = local.resource_group_id + resource_group_id = var.existing_resource_group security_group_ids = var.security_group_ids - user_data = data.template_file.storage_user_data.rendered + user_data = var.bms_boot_drive_encryption == false ? data.template_file.storage_user_data.rendered : templatefile("${path.module}/templates/cloud_init.yml", local.user_data_vars) +} + + +resource "time_sleep" "wait_for_reboot_tolerate" { + count = var.bms_boot_drive_encryption == true ? 1 : 0 + create_duration = "400s" + depends_on = [module.storage_baremetal] +} + +resource "null_resource" "scale_boot_drive_reboot_tolerate_provisioner" { + for_each = var.bms_boot_drive_encryption == false ? {} : { + for idx, count_number in range(1, length(var.storage_servers) + 1) : idx => { + network_ip = element(local.bm_server_ips, idx) + } + } + connection { + type = "ssh" + host = each.value.network_ip + user = "root" + private_key = var.storage_private_key_content + timeout = "60m" + } + + provisioner "remote-exec" { + inline = [ + "while true; do", + " lsblk | grep crypt", + " if [[ \"$?\" -eq 0 ]]; then", + " break", + " fi", + " echo \"Waiting for BMS to be rebooted and drive to get encrypted...\"", + " sleep 10", + "done", + "lsblk", + "systemctl restart NetworkManager", + "echo \"Restarted NetworkManager\"" + ] + } + depends_on = [time_sleep.wait_for_reboot_tolerate] } diff --git a/modules/baremetal/outputs.tf b/modules/baremetal/outputs.tf index 1f429c38..86c23c19 100644 --- a/modules/baremetal/outputs.tf +++ b/modules/baremetal/outputs.tf @@ -12,3 +12,8 @@ output "list" { ] ]) } + +output "instance_ips_with_vol_mapping" { + value = { for instance_details in local.bm_server_name : instance_details => local.selected_disks } + description = "Instance ips with vol mapping" +} \ No newline at end of file diff --git a/modules/baremetal/template_files.tf b/modules/baremetal/template_files.tf index a7117b26..4b8d8d93 100644 --- a/modules/baremetal/template_files.tf +++ b/modules/baremetal/template_files.tf @@ -2,8 +2,8 @@ data "template_file" "storage_user_data" { template = file("${path.module}/templates/storage_user_data.tpl") vars = { bastion_public_key_content = var.bastion_public_key_content != null ? var.bastion_public_key_content : "" - storage_public_key_content = local.enable_storage ? module.storage_key[0].public_key_content : "" - storage_private_key_content = local.enable_storage ? module.storage_key[0].private_key_content : "" + storage_public_key_content = var.storage_public_key_content + storage_private_key_content = var.storage_private_key_content storage_interfaces = local.bms_interfaces[0] storage_dns_domain = var.dns_domain_names["storage"] } diff --git a/modules/baremetal/templates/cloud_init.yml b/modules/baremetal/templates/cloud_init.yml new file mode 100644 index 00000000..8bfc25aa --- /dev/null +++ b/modules/baremetal/templates/cloud_init.yml @@ -0,0 +1,188 @@ +#cloud-config +growpart: + mode: off + devices: ['/'] +resize_rootfs: false +write_files: + - content: | + #!/usr/bin/env bash + if grep -q "Red Hat" /etc/os-release + then + USER=vpcuser + PACKAGE_MGR=dnf + if grep -q "platform:el9" /etc/os-release + then + subscription-manager repos --enable=rhel-9-for-x86_64-supplementary-eus-rpms + package_list="python3 kernel-devel-$(uname -r) kernel-headers-$(uname -r) firewalld numactl make gcc-c++ elfutils-libelf-devel bind-utils iptables-nft nfs-utils elfutils elfutils-devel python3-dnf-plugin-versionlock cryptsetup clevis clevis-luks clevis-dracut tpm2-tools" + elif grep -q "platform:el8" /etc/os-release + then + package_list="python38 kernel-devel-$(uname -r) kernel-headers-$(uname -r) firewalld numactl jq make gcc-c++ elfutils-libelf-devel bind-utils iptables nfs-utils elfutils elfutils-devel python3-dnf-plugin-versionlock cryptsetup clevis clevis-luks clevis-dracut tpm2-tools" + fi + + RETRY_LIMIT=5 + retry_count=0 + all_pkg_installed=1 + + while [[ $all_pkg_installed -ne 0 && $retry_count -lt $RETRY_LIMIT ]] + do + # Install all required packages + echo "INFO: Attempting to install packages" + $PACKAGE_MGR install -y $package_list + + # Check to ensure packages are installed + pkg_installed=0 + for pkg in $package_list + do + pkg_query=$($PACKAGE_MGR list installed $pkg) + pkg_installed=$(($? + $pkg_installed)) + done + if [[ $pkg_installed -ne 0 ]] + then + # The minimum required packages have not been installed. + echo "WARN: Required packages not installed. Sleeping for 60 seconds and retrying..." + touch /var/log/scale-rerun-package-install + echo "INFO: Cleaning and repopulating repository data" + $PACKAGE_MGR clean all + $PACKAGE_MGR makecache + sleep 60 + else + all_pkg_installed=0 + fi + retry_count=$(( $retry_count+1 )) + done + + yum update --security -y + yum versionlock add $package_list + yum versionlock list + echo 'export PATH=$PATH:/usr/lpp/mmfs/bin' >> /root/.bashrc + elif grep -q "Ubuntu" /etc/os-release + then + USER=ubuntu + fi + + sed -i -e "s/^/no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"echo \'Please login as the user \\\\\"$USER\\\\\" rather than the user \\\\\"root\\\\\".\';echo;sleep 10; exit 142\" /" ~/.ssh/authorized_keys + echo "${vsi_meta_private_key}" | base64 --decode > /root/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + echo "${vsi_meta_public_key}" | base64 --decode >> /root/.ssh/authorized_keys + echo "${bastion_public_key_content}" | base64 --decode >> /root/.ssh/authorized_keys + echo "StrictHostKeyChecking no" >> ~/.ssh/config + echo "DOMAIN=\"${dns_domain}\"" >> "/etc/sysconfig/network-scripts/ifcfg-eth0" + echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-eth0" + sed -i -e "s#QUEUE_COUNT=3#QUEUE_COUNT=\`ethtool -l \$iface | echo \$(awk '\$1 ~ /Combined:/ {print \$2;exit}')\`#g" /var/lib/cloud/scripts/per-boot/iface-config + ethtool -L eth0 combined 16 + chage -I -1 -m 0 -M 99999 -E -1 -W 14 vpcuser + systemctl restart NetworkManager + systemctl stop firewalld + firewall-offline-cmd --zone=public --add-port=1191/tcp + firewall-offline-cmd --zone=public --add-port=4444/tcp + firewall-offline-cmd --zone=public --add-port=4444/udp + firewall-offline-cmd --zone=public --add-port=4739/udp + firewall-offline-cmd --zone=public --add-port=4739/tcp + firewall-offline-cmd --zone=public --add-port=9084/tcp + firewall-offline-cmd --zone=public --add-port=9085/tcp + firewall-offline-cmd --zone=public --add-service=http + firewall-offline-cmd --zone=public --add-service=https + firewall-offline-cmd --zone=public --add-port=2049/tcp + firewall-offline-cmd --zone=public --add-port=2049/udp + firewall-offline-cmd --zone=public --add-port=111/tcp + firewall-offline-cmd --zone=public --add-port=111/udp + firewall-offline-cmd --zone=public --add-port=30000-61000/tcp + firewall-offline-cmd --zone=public --add-port=30000-61000/udp + systemctl start firewalld + systemctl enable firewalld + + if [ "${enable_protocol}" == true ]; then + sec_interface=$(nmcli -t con show --active | grep eth1 | cut -d ':' -f 1) + nmcli conn del "$sec_interface" + nmcli con add type ethernet con-name eth1 ifname eth1 + echo "DOMAIN=\"${protocol_domain}\"" >> "/etc/sysconfig/network-scripts/ifcfg-eth1" + echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-eth1" + systemctl restart NetworkManager + ###### TODO: Fix Me ###### + echo 'export IC_REGION=${vpc_region}' >> /root/.bashrc + echo 'export IC_SUBNET=${protocol_subnet_id}' >> /root/.bashrc + echo 'export IC_RG=${resource_group_id}' >> /root/.bashrc + fi + path: /usr/local/bin/scale_user_data.sh + permissions: '0755' + - content: | + #!/bin/bash + # This script encrypts the root partition of a Redhat 8/9 stock IBM Cloud + # image using the TPM to encrypt the LUKS keys. It assumes there is plenty + # of unpartition space on the drive, and leaves the current root partition + # for rescue boot (but this could be deleted on a subsequent boot). + # + # * Create a new partition on the drive using all free space + # * Encrypt the new partition using LUKS with a known passphrase + # * Use 'clevis' to create an additional LUKS passphrase that is bound to the TPM + # * Re-generate initramfs via dracut to ensure the root drive is auto-unlocked on boot + # * Copy the current root filesystem to the new drive + # * Update fstab and crypttab for auto-mounting + # * Update grub to boot using the newly encrypted root drive + # + echo "Encrypt my boot drive" + # Determine the boot device (minus partition name) + # Assumes 'sdaX' or 'nvmeXnYpZ' + device=$(mount | grep "on / type" | awk '{print $1}') + if [[ "$device" =~ "nvme" ]]; then + device=$${device%??} + else + device=$${device%?} + fi + echo $device + # Create a root partition filling up the rest of the drive + echo -e 'n\np\n\n\n\nw' | fdisk $${device} + partition=$(fdisk -l $device | grep $device | tail -1 | awk '{print $1}') + echo $partition + # Setup encryption on the drive with a well known passphrase, and format the filesystem + echo -n n0tsecret | cryptsetup luksFormat --type luks2 -q --force-password $partition + echo -n n0tsecret | cryptsetup open $partition root + mkfs.xfs /dev/mapper/root + # Add the TPM key to the LUKS encrypted drive. + # For additional security, you can bind it to specific TPM PCR banks, but this will cause the TPM unlock + # to fail when the bank changes (EG firmware is updated). If you want to bind it to a PCR: + # ,"pcr_bank":"sha256","pcr_ids":"7" + echo -n n0tsecret | clevis luks bind -y -k - -d $partition tpm2 '{"hash":"sha256","key":"rsa"}' + # Regenerate dracut initramfs to allow unlock on boot + dracut -fv --regenerate-all + # Copy the OS into the encrypted partition + mkdir /mnt/encryptedroot + mount /dev/mapper/root /mnt/encryptedroot + rsync -a --exclude='/proc/*' --exclude='/sys/*' --exclude='/boot' --exclude='/mnt/encryptedroot' / /mnt/encryptedroot + # Grab the UUID for the encrypted partition and setup the crypttab + uuid=$(lsblk -lfi -o NAME,FSTYPE,UUID | grep crypto_LUKS | awk '{print $3}') + echo "root UUID=$${uuid} none luks" > /mnt/encryptedroot/etc/crypttab + # Replace root with '/dev/mapper/root / xfs defaults 0 1' in fstab + sed -i "/\t\/\t/c/dev/mapper/root\t/\txfs\tdefaults\t0\t1" /mnt/encryptedroot/etc/fstab + # Setup grub + # Grab default cmdline args + args=$(grep CMDLINE_LINUX /etc/default/grub | sed 's/.*GRUB_CMDLINE_LINUX=//' | sed 's/\"//g') + # Update grub and set the new entry to be the default. + grubby --add-kernel="/boot/vmlinuz-$(uname -r)" \ + --title="Boot from encrypted root" \ + --initrd="/boot/initramfs-$(uname -r).img" \ + --args "$${args} root=/dev/mapper/root rd.luks.name=$${uuid}=root" \ + --make-default + # Since we use EFI, copy the grubenv over (note the \cp is not a typo, + # it ensures that the 'cp' alias isn't used.) + efidir=$(ls /boot/efi/EFI/ | grep -v BOOT) + \cp -f /boot/grub2/grubenv /boot/efi/EFI/$${efidir}/ + # We MUST have a separate /boot partiiton to host the kernel and initramfs unencrypted + # as these are needed to unlock the root drive. The IBM CLoud RHEL 9.x images have + # a separate boot partiiton, but 8.x do not. + # If we dont have a separate /boot partition, we'll use the current root partition + # as /boot. So copy the current /boot content into the root of the filessytem. + if ! lsblk -l | grep /boot$; then + rsync -a --exclude='/efi*' /boot/ / + # Current root device UUID - it will become boot device uuid + curr_root_uuid=$(lsblk -fl | grep /$ | awk '{print $4}') + # Add the new /boot partition to fstab for auto-mounting. + echo -e "UUID=$${curr_root_uuid}\t/boot\txfs\tdefaults\t0\t0" >> /mnt/encryptedroot/etc/fstab + fi + # Reboot the system + shutdown -r now + path: /usr/local/bin/boot_drive_encryption.sh + permissions: '0755' +runcmd: + - /usr/local/bin/scale_user_data.sh + - /usr/local/bin/boot_drive_encryption.sh diff --git a/modules/baremetal/templates/storage_user_data.tpl b/modules/baremetal/templates/storage_user_data.tpl index 31f15e6b..ad36fb68 100644 --- a/modules/baremetal/templates/storage_user_data.tpl +++ b/modules/baremetal/templates/storage_user_data.tpl @@ -32,6 +32,7 @@ then if grep -q "platform:el9" /etc/os-release then PACKAGE_MGR=dnf + subscription-manager repos --enable=rhel-9-for-x86_64-supplementary-eus-rpms package_list="python3 kernel-devel-$(uname -r) kernel-headers-$(uname -r) firewalld numactl make gcc-c++ elfutils-libelf-devel bind-utils iptables-nft nfs-utils elfutils elfutils-devel python3-dnf-plugin-versionlock" elif grep -q "platform:el8" /etc/os-release then @@ -96,6 +97,8 @@ echo "########################################################################## echo "DOMAIN=${storage_dns_domain}" >> "/etc/sysconfig/network-scripts/ifcfg-${storage_interfaces}" echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-${storage_interfaces}" +sed -i -e "s#QUEUE_COUNT=3#QUEUE_COUNT=\`ethtool -l \$iface | echo \$(awk '\$1 ~ /Combined:/ {print \$2;exit}')\`#g" /var/lib/cloud/scripts/per-boot/iface-config +ethtool -L eth0 combined 16 chage -I -1 -m 0 -M 99999 -E -1 -W 14 vpcuser sleep 120 systemctl restart NetworkManager diff --git a/modules/baremetal/variables.tf b/modules/baremetal/variables.tf index f24d57c6..14d1e978 100644 --- a/modules/baremetal/variables.tf +++ b/modules/baremetal/variables.tf @@ -107,3 +107,23 @@ variable "dns_domain_names" { } description = "IBM Cloud HPC DNS domain names." } + +variable "storage_public_key_content" { + type = string + sensitive = true + default = null + description = "Storage nodes public key content." +} + +variable "storage_private_key_content" { + type = string + sensitive = true + default = null + description = "Storage nodes private key content." +} + +variable "bms_boot_drive_encryption" { + type = bool + default = false + description = "To enable the encryption for the boot drive of bare metal server. Select true or false" +} \ No newline at end of file diff --git a/modules/common/encryption_configuration/encryption_configuration.tf b/modules/common/encryption_configuration/encryption_configuration.tf new file mode 100644 index 00000000..7132ecbf --- /dev/null +++ b/modules/common/encryption_configuration/encryption_configuration.tf @@ -0,0 +1,54 @@ + +locals { + gklm_private_key = format("%s/gklm_key/id_rsa", var.clone_path) + scale_encryption_servers = jsonencode(var.scale_encryption_servers) + scale_encryption_servers_dns = jsonencode(var.scale_encryption_servers_dns) + compute_inventory_path = format("%s/%s/compute_inventory.ini", var.clone_path, "ibm-spectrum-scale-install-infra") + storage_inventory_path = format("%s/%s/storage_inventory.ini", var.clone_path, "ibm-spectrum-scale-install-infra") + combined_inventory_path = format("%s/%s/combined_inventory.ini", var.clone_path, "ibm-spectrum-scale-install-infra") + encryption_gklm_playbook = format("%s/%s/encryption_gklm_playbook.yaml", var.clone_path, "ibm-spectrum-scale-install-infra") + encryption_cluster_playbook = format("%s/%s/encryption_cluster_playbook.yaml", var.clone_path, "ibm-spectrum-scale-install-infra") +} + +resource "local_sensitive_file" "write_meta_private_key" { + count = (tobool(var.turn_on) == true && var.scale_encryption_type == "gklm") ? 1 : 0 + content = var.meta_private_key + filename = local.gklm_private_key + file_permission = "0600" +} + +resource "null_resource" "perform_encryption_prepare" { + count = (tobool(var.turn_on) == true && tobool(var.create_scale_cluster) == true && var.scale_encryption_type == "gklm") ? 1 : 0 + provisioner "local-exec" { + interpreter = ["/bin/bash", "-c"] + command = "/usr/local/bin/ansible-playbook -f 32 ${local.encryption_gklm_playbook} -e scale_cluster_clustername=${var.scale_cluster_clustername} -e ansible_ssh_private_key_file=${local.gklm_private_key} -e scale_encryption_admin_default_password=${var.scale_encryption_admin_default_password} -e scale_encryption_admin_password=${var.scale_encryption_admin_password} -e scale_encryption_admin_user=${var.scale_encryption_admin_username} -e '{\"scale_encryption_servers_list\": ${local.scale_encryption_servers}}'" + } + depends_on = [local_sensitive_file.write_meta_private_key] + triggers = { + build = timestamp() + } +} + +resource "null_resource" "perform_encryption_storage" { + count = (tobool(var.turn_on) == true && tobool(var.storage_cluster_encryption) == true && tobool(var.storage_cluster_create_complete) == true && tobool(var.remote_mount_create_complete) == true && tobool(var.create_scale_cluster) == true && var.scale_encryption_type == "gklm") ? 1 : 0 + provisioner "local-exec" { + interpreter = ["/bin/bash", "-c"] + command = "/usr/local/bin/ansible-playbook -f 32 -i ${local.storage_inventory_path} ${local.encryption_cluster_playbook} -e '{\"scale_encryption_servers_dns\": ${local.scale_encryption_servers_dns}}'" + } + depends_on = [null_resource.perform_encryption_prepare] + triggers = { + build = timestamp() + } +} + +resource "null_resource" "perform_encryption_compute" { + count = (tobool(var.turn_on) == true && tobool(var.compute_cluster_encryption) == true && tobool(var.compute_cluster_create_complete) == true && tobool(var.remote_mount_create_complete) == true && tobool(var.create_scale_cluster) == true && var.scale_encryption_type == "gklm") ? 1 : 0 + provisioner "local-exec" { + interpreter = ["/bin/bash", "-c"] + command = "/usr/local/bin/ansible-playbook -f 32 -i ${local.compute_inventory_path} ${local.encryption_cluster_playbook} -e '{\"scale_encryption_servers_dns\": ${local.scale_encryption_servers_dns}}'" + } + depends_on = [null_resource.perform_encryption_prepare, null_resource.perform_encryption_storage] + triggers = { + build = timestamp() + } +} diff --git a/modules/common/encryption_configuration/variables.tf b/modules/common/encryption_configuration/variables.tf new file mode 100644 index 00000000..531eb78a --- /dev/null +++ b/modules/common/encryption_configuration/variables.tf @@ -0,0 +1,16 @@ +variable "turn_on" {} +variable "clone_path" {} +variable "create_scale_cluster" {} +variable "meta_private_key" {} +variable "scale_cluster_clustername" {} +variable "scale_encryption_servers" {} +variable "scale_encryption_servers_dns" {} +variable "scale_encryption_admin_default_password" {} +variable "scale_encryption_admin_password" {} +variable "scale_encryption_admin_username" {} +variable "scale_encryption_type" {} +variable "compute_cluster_create_complete" {} +variable "storage_cluster_create_complete" {} +variable "remote_mount_create_complete" {} +variable "compute_cluster_encryption" {} +variable "storage_cluster_encryption" {} diff --git a/modules/deployer/datasource.tf b/modules/deployer/datasource.tf index af05b26e..fc391e8a 100644 --- a/modules/deployer/datasource.tf +++ b/modules/deployer/datasource.tf @@ -3,11 +3,12 @@ # } data "ibm_is_image" "bastion" { - name = var.bastion_image + name = var.bastion_instance["image"] } data "ibm_is_image" "deployer" { - name = var.deployer_image + count = local.deployer_image_found_in_map ? 0 : 1 + name = var.deployer_instance["image"] } data "ibm_is_ssh_key" "bastion" { @@ -20,3 +21,8 @@ data "ibm_is_instance" "bastion_instance_name" { count = var.bastion_instance_name != null ? 1 : 0 name = var.bastion_instance_name } + +#Existing Public Gateway attachment +data "ibm_is_public_gateways" "public_gateways" { + count = var.ext_vpc_name != null ? 1 : 0 +} diff --git a/modules/deployer/image_map.tf b/modules/deployer/image_map.tf new file mode 100644 index 00000000..292e2b49 --- /dev/null +++ b/modules/deployer/image_map.tf @@ -0,0 +1,30 @@ +locals { + image_region_map = { + "hpc-lsf-fp15-deployer-rhel810-v1" = { + "eu-es" = "r050-e7b874c1-f370-41c4-8ee6-50efb07aa340" + "eu-gb" = "r018-eb14c522-cb0f-4b72-948f-2c029957665a" + "eu-de" = "r010-00629ef3-324c-4651-a7a7-76830d2ad660" + "us-east" = "r014-ac586488-de00-490e-8962-5e2a7fcab076" + "us-south" = "r006-f2b7871c-54c9-4b02-837c-1d28294f0842" + "jp-tok" = "r022-dd715ea3-d2dc-4936-bff0-51c9cd63b3a9" + "jp-osa" = "r034-82d648ed-fd3e-4248-955c-6009c973aa5f" + "au-syd" = "r026-b47e4863-f5e7-440c-8734-c058f6b8ce33" + "br-sao" = "r042-8b5ac031-3e65-4afb-9679-b7e2b907a2ad" + "ca-tor" = "r038-c55b1ab4-500f-4842-9e78-dc64a16a746a" + "ca-mon" = "r058-fc93c3f9-f97c-4d9b-b8d6-dd40db891913" + }, + "hpc-lsf-fp14-deployer-rhel810-v1" = { + "eu-es" = "r050-a530edc3-d053-41cd-899b-2c61d53d5efd" + "eu-gb" = "r018-b368f002-64ea-48bb-a5f1-77e7891c2691" + "eu-de" = "r010-c5b5f7d9-bc3e-4e18-9724-f682ccfef617" + "us-east" = "r014-9d0c683d-da23-4836-9057-d8732c26010a" + "us-south" = "r006-33e861c5-590f-492d-a97b-eb62e313dc8d" + "jp-tok" = "r022-b02c8618-ea8f-42bf-854a-da5822ee3cb5" + "jp-osa" = "r034-490ee8d9-f5af-410d-9aeb-c6190beefdf6" + "au-syd" = "r026-93a5c85d-8861-46a9-8100-1d3d788f750d" + "br-sao" = "r042-93c1a769-c138-4765-91d2-5796965b6a98" + "ca-tor" = "r038-9448213f-22ce-4a6a-b6b0-22dd6ed9fbb3" + "ca-mon" = "r058-b3211406-9eec-4148-aafb-d6ab7c26a6eb" + } + } +} diff --git a/modules/deployer/locals.tf b/modules/deployer/locals.tf index 3ba9e1a6..140240bc 100644 --- a/modules/deployer/locals.tf +++ b/modules/deployer/locals.tf @@ -3,6 +3,7 @@ locals { name = var.scheduler == "LSF" ? "LSF" : (var.scheduler == "Scale" ? "Scale" : (var.scheduler == "HPCaaS" ? "HPCaaS" : (var.scheduler == "Symphony" ? "Symphony" : (var.scheduler == "Slurm" ? "Slurm" : "")))) prefix = var.prefix tags = [local.prefix, local.name] + region = join("-", slice(split("-", var.zones[0]), 0, 2)) schematics_reserved_cidrs = [ "169.44.0.0/14", @@ -16,20 +17,24 @@ locals { "150.238.230.128/27", "169.55.82.128/27" ] - bastion_sg_variable_cidr = var.enable_deployer == false ? distinct(flatten([ + bastion_sg_variable_cidr = distinct(flatten([ local.schematics_reserved_cidrs, var.allowed_cidr, - var.network_cidr - ])) : distinct(flatten([var.allowed_cidr, var.network_cidr])) + var.cluster_cidr + ])) - enable_bastion = var.enable_bastion || var.enable_deployer enable_deployer = var.enable_deployer bastion_node_name = format("%s-%s", local.prefix, "bastion") deployer_node_name = format("%s-%s", local.prefix, "deployer") - bastion_image_id = data.ibm_is_image.bastion.id - deployer_image_id = data.ibm_is_image.deployer.id + bastion_image_id = data.ibm_is_image.bastion.id + + # deployer_image_id = data.ibm_is_image.deployer[0].id + # Check whether an entry is found in the mapping file for the given deployer node image + deployer_image_found_in_map = contains(keys(local.image_region_map), var.deployer_instance["image"]) + # If not found, assume the name is the id already (customer provided image) + new_deployer_image_id = local.deployer_image_found_in_map ? local.image_region_map[var.deployer_instance["image"]][local.region] : "Image not found with the given name" bastion_ssh_keys = [for name in var.ssh_keys : data.ibm_is_ssh_key.bastion[name].id] @@ -49,12 +54,15 @@ locals { name = format("allow-variable-inbound-%s", index(local.bastion_sg_variable_cidr, cidr) + 1) direction = "inbound" remote = cidr - # ssh port - tcp = { - port_min = 22 - port_max = 22 - } }], + + # Conditional SG ID inbound rule (added only if condition is met) + var.existing_bastion_security_group_id != null ? [{ + name = "allow-sg-id-inbound" + direction = "inbound" + remote = var.existing_bastion_security_group_id # The source security group ID + }] : [], + [for cidr in concat(local.bastion_sg_variable_cidr, ["0.0.0.0/0"]) : { name = format("allow-variable-outbound-%s", index(concat(local.bastion_sg_variable_cidr, ["0.0.0.0/0"]), cidr) + 1) direction = "outbound" @@ -68,16 +76,6 @@ locals { # Subnets bastion_subnets = var.bastion_subnets - - - # Bastion Security group rule update to connect with login node - bastion_security_group_rule_update = [ - { - name = "inbound-rule-for-login-node-connection" - direction = "inbound" - remote = var.bastion_security_group_id - } - ] } locals { @@ -85,3 +83,8 @@ locals { compute_interfaces = local.vsi_interfaces[0] compute_dns_domain = var.dns_domain_names["compute"] } + +locals { + public_gateways_list = var.ext_vpc_name != null ? data.ibm_is_public_gateways.public_gateways[0].public_gateways : [] + zone_1_pgw_ids = var.ext_vpc_name != null ? [for gateway in local.public_gateways_list : gateway.id if gateway.vpc == var.vpc_id && gateway.zone == var.zones[0]] : [] +} diff --git a/modules/deployer/main.tf b/modules/deployer/main.tf index 23d1f9a4..adf15701 100644 --- a/modules/deployer/main.tf +++ b/modules/deployer/main.tf @@ -1,11 +1,23 @@ +resource "ibm_is_subnet_public_gateway_attachment" "zone_1_attachment" { + count = (var.ext_vpc_name != null && var.ext_cluster_subnet_id == null) ? 1 : 0 + subnet = var.cluster_subnets[0].id + public_gateway = length(local.zone_1_pgw_ids) > 0 ? local.zone_1_pgw_ids[0] : "" +} + +resource "ibm_is_subnet_public_gateway_attachment" "bastion_attachment" { + count = (var.ext_vpc_name != null && var.ext_login_subnet_id == null) ? 1 : 0 + subnet = local.bastion_subnets[0].id + public_gateway = length(local.zone_1_pgw_ids) > 0 ? local.zone_1_pgw_ids[0] : "" +} + module "ssh_key" { - count = local.enable_bastion ? 1 : 0 + count = var.enable_deployer ? 1 : 0 source = "./../key" private_key_path = "bastion_id_rsa" #checkov:skip=CKV_SECRET_6 } module "bastion_sg" { - count = local.enable_bastion ? 1 : 0 + count = var.enable_deployer ? 1 : 0 source = "terraform-ibm-modules/security-group/ibm" version = "2.6.2" add_ibm_cloud_internal_rules = true @@ -15,28 +27,15 @@ module "bastion_sg" { vpc_id = var.vpc_id } -module "existing_bastion_sg_update" { - count = (local.enable_bastion && var.bastion_security_group_id != null) ? 1 : 0 - source = "terraform-ibm-modules/security-group/ibm" - version = "2.6.2" - resource_group = var.resource_group - add_ibm_cloud_internal_rules = true - use_existing_security_group_id = true - existing_security_group_id = var.bastion_security_group_id - security_group_rules = local.bastion_security_group_rule_update - vpc_id = var.vpc_id - depends_on = [module.bastion_sg] -} - module "bastion_vsi" { - count = (var.enable_bastion && var.bastion_instance_name == null) ? 1 : 0 + count = (var.enable_deployer && var.bastion_instance_name == null) ? 1 : 0 source = "terraform-ibm-modules/landing-zone-vsi/ibm" version = "5.0.0" vsi_per_subnet = 1 create_security_group = false security_group = null image_id = local.bastion_image_id - machine_type = var.bastion_instance_profile + machine_type = var.bastion_instance["profile"] prefix = local.bastion_node_name resource_group_id = var.resource_group enable_floating_ip = true @@ -47,7 +46,7 @@ module "bastion_vsi" { user_data = data.template_file.bastion_user_data.rendered vpc_id = var.vpc_id kms_encryption_enabled = var.kms_encryption_enabled - skip_iam_authorization_policy = var.skip_iam_authorization_policy + skip_iam_authorization_policy = true boot_volume_encryption_key = var.boot_volume_encryption_key existing_kms_instance_guid = var.existing_kms_instance_guid } @@ -59,8 +58,8 @@ module "deployer_vsi" { vsi_per_subnet = 1 create_security_group = false security_group = null - image_id = local.deployer_image_id - machine_type = var.deployer_instance_profile + image_id = local.deployer_image_found_in_map ? local.new_deployer_image_id : data.ibm_is_image.deployer[0].id + machine_type = var.deployer_instance["profile"] prefix = local.deployer_node_name resource_group_id = var.resource_group enable_floating_ip = false diff --git a/modules/deployer/outputs.tf b/modules/deployer/outputs.tf index e0636002..f7ed216b 100644 --- a/modules/deployer/outputs.tf +++ b/modules/deployer/outputs.tf @@ -25,12 +25,7 @@ output "bastion_fip_id" { output "bastion_security_group_id" { description = "Bastion SG" - value = var.bastion_security_group_id != null ? var.bastion_security_group_id : one(module.bastion_sg[*].security_group_id) -} - -output "bastion_security_group_id_for_ref" { - description = "Bastion SG id for ref" - value = one(module.bastion_sg[*].security_group_id_for_ref) + value = one(module.bastion_sg[*].security_group_id) } output "bastion_public_key_content" { diff --git a/modules/deployer/template_files.tf b/modules/deployer/template_files.tf index 467a047d..8939cee2 100644 --- a/modules/deployer/template_files.tf +++ b/modules/deployer/template_files.tf @@ -1,15 +1,15 @@ data "template_file" "bastion_user_data" { template = file("${path.module}/templates/bastion_user_data.tpl") vars = { - ssh_public_key_content = local.enable_bastion ? module.ssh_key[0].public_key_content : "" + ssh_public_key_content = var.enable_deployer ? module.ssh_key[0].public_key_content : "" } } data "template_file" "deployer_user_data" { template = file("${path.module}/templates/deployer_user_data.tpl") vars = { - bastion_public_key_content = local.enable_bastion ? module.ssh_key[0].public_key_content : "" - compute_dns_domain = local.enable_bastion ? local.compute_dns_domain : "" - compute_interfaces = local.enable_bastion ? local.compute_interfaces : "" + bastion_public_key_content = var.enable_deployer ? module.ssh_key[0].public_key_content : "" + compute_dns_domain = var.enable_deployer ? local.compute_dns_domain : "" + compute_interfaces = var.enable_deployer ? local.compute_interfaces : "" } } diff --git a/modules/deployer/variables.tf b/modules/deployer/variables.tf index d8a81b33..e1c9fb2e 100644 --- a/modules/deployer/variables.tf +++ b/modules/deployer/variables.tf @@ -31,10 +31,39 @@ variable "vpc_id" { description = "ID of an existing VPC in which the cluster resources will be deployed." } -variable "network_cidr" { - description = "Network CIDR for the VPC. This is used to manage network ACL rules for cluster provisioning." +variable "ext_vpc_name" { type = string - default = "10.0.0.0/8" + default = null + description = "Name of an existing VPC in which the cluster resources will be deployed. If no value is given, then a new VPC will be provisioned for the cluster. [Learn more](https://cloud.ibm.com/docs/vpc)" +} + +variable "cluster_cidr" { + description = "Network CIDR of the VPC. This is used to manage network security rules for cluster provisioning." + type = string + default = "10.241.0.0/18" +} + +variable "cluster_subnets" { + type = list(object({ + name = string + id = string + zone = string + cidr = string + })) + default = [] + description = "Name of an existing subnets in which the cluster resources will be deployed. If no value is given, then new subnet(s) will be provisioned for the cluster. [Learn more](https://cloud.ibm.com/docs/vpc)" +} + +variable "ext_login_subnet_id" { + type = string + default = null + description = "Name of an existing subnets in which the bastion and cluster resources will be deployed. If no value is given, then new subnet(s) will be provisioned for the cluster. [Learn more](https://cloud.ibm.com/docs/vpc)" +} + +variable "ext_cluster_subnet_id" { + type = string + default = null + description = "Name of an existing subnets in which the bastion and cluster resources will be deployed. If no value is given, then new subnet(s) will be provisioned for the cluster. [Learn more](https://cloud.ibm.com/docs/vpc)" } ############################################################################## @@ -49,22 +78,17 @@ variable "scheduler" { ############################################################################## # Access Variables ############################################################################## -variable "enable_bastion" { - type = bool - default = true - description = "The solution supports multiple ways to connect to your HPC cluster for example, using bastion node, via VPN or direct connection. If connecting to the HPC cluster via VPN or direct connection, set this value to false." -} - -variable "bastion_image" { - type = string - default = "ibm-ubuntu-22-04-3-minimal-amd64-1" - description = "The image to use to deploy the bastion host." -} -variable "bastion_instance_profile" { - type = string - default = "cx2-4x8" - description = "Deployer should be only used for better deployment performance" +variable "bastion_instance" { + type = object({ + image = string + profile = string + }) + default = { + image = "ibm-ubuntu-22-04-5-minimal-amd64-3" + profile = "cx2-4x8" + } + description = "Configuration for the Bastion node, including the image and instance profile. Only Ubuntu stock images are supported." } variable "bastion_subnets" { @@ -84,19 +108,19 @@ variable "bastion_subnets" { variable "enable_deployer" { type = bool default = false - description = "deployer should be only used for better deployment performance" + description = "Deployer should be only used for better deployment performance." } -variable "deployer_image" { - type = string - default = "ibm-redhat-8-10-minimal-amd64-2" - description = "The image to use to deploy the deployer host." -} - -variable "deployer_instance_profile" { - type = string - default = "mx2-4x32" - description = "deployer should be only used for better deployment performance" +variable "deployer_instance" { + type = object({ + image = string + profile = string + }) + default = { + image = "hpc-lsf-fp15-deployer-rhel810-v1" + profile = "bx2-8x32" + } + description = "Configuration for the deployer node, including the custom image and instance profile. By default, uses fixpack_15 image and a bx2-8x32 profile." } variable "ssh_keys" { @@ -107,7 +131,7 @@ variable "ssh_keys" { variable "allowed_cidr" { description = "Network CIDR to access the VPC. This is used to manage network ACL rules for accessing the cluster." type = list(string) - default = ["10.0.0.0/8"] + default = [] } # TODO: landing-zone-vsi limitation to opt out encryption @@ -160,8 +184,13 @@ variable "bastion_instance_public_ip" { description = "Bastion instance public ip address." } -variable "bastion_security_group_id" { +variable "existing_bastion_security_group_id" { type = string default = null - description = "Bastion security group id." + description = "Existing bastion security group id." +} + +variable "zones" { + description = "Region where VPC will be created. To find your VPC region, use `ibmcloud is regions` command to find available regions." + type = list(string) } diff --git a/modules/inventory/main.tf b/modules/inventory/main.tf index 86c1808d..862149b4 100644 --- a/modules/inventory/main.tf +++ b/modules/inventory/main.tf @@ -4,14 +4,19 @@ locals { resource "local_sensitive_file" "mount_path_file" { content = < "${var.key_protect_path}/Key_Protect_Server.cert" + # Extract the end date of the certificate + [ -f "${var.key_protect_path}/Key_Protect_Server.cert" ] && END_DATE=$(openssl x509 -enddate -noout -in "${var.key_protect_path}/Key_Protect_Server.cert" | awk -F'=' '{print $2}') + # Get the current date in GMT + CURRENT_DATE=$(date -u +"%b %d %T %Y %Z") + # Calculate the difference in days + DIFF_DAYS=$(echo $(( ( $(date -ud "$END_DATE" +%s) - $(date -ud "$CURRENT_DATE" +%s) ) / 86400 ))) + # Create a Key Protect Server Root and CA certs + [ -f "${var.key_protect_path}/Key_Protect_Server.cert" ] && awk '/-----BEGIN CERTIFICATE-----/,/-----END CERTIFICATE-----/' "${var.key_protect_path}/Key_Protect_Server.cert" > "${var.key_protect_path}/Key_Protect_Server_CA.cert" + [ -f "${var.key_protect_path}/Key_Protect_Server_CA.cert" ] && awk '/-----BEGIN CERTIFICATE-----/{x="${var.key_protect_path}/Key_Protect_Server.chain"i".cert"; i++} {print > x}' "${var.key_protect_path}/Key_Protect_Server_CA.cert" + [ -f "${var.key_protect_path}/Key_Protect_Server.chain.cert" ] && mv "${var.key_protect_path}/Key_Protect_Server.chain.cert" "${var.key_protect_path}/Key_Protect_Server.chain0.cert" + # Create a Self Signed Certificates + [ ! -f "${var.key_protect_path}/${var.resource_prefix}.key" ] && openssl genpkey -algorithm RSA -out "${var.key_protect_path}/${var.resource_prefix}.key" + [ ! -f "${var.key_protect_path}/${var.resource_prefix}.csr" ] && openssl req -new -key "${var.key_protect_path}/${var.resource_prefix}.key" -out "${var.key_protect_path}/${var.resource_prefix}.csr" -subj "/CN=${var.vpc_storage_cluster_dns_domain}" + [ ! -f "${var.key_protect_path}/${var.resource_prefix}.cert" ] && openssl x509 -req -days $DIFF_DAYS -in "${var.key_protect_path}/${var.resource_prefix}.csr" -signkey "${var.key_protect_path}/${var.resource_prefix}.key" -out "${var.key_protect_path}/${var.resource_prefix}.cert" + EOT + } +} + +resource "ibm_kms_key" "scale_key" { + instance_id = var.key_protect_instance_id + key_name = "key" + standard_key = false +} + +resource "ibm_kms_kmip_adapter" "sclae_kmip_adapter" { + instance_id = var.key_protect_instance_id + profile = "native_1.0" + profile_data = { + "crk_id" = ibm_kms_key.scale_key.key_id + } + description = "Key Protect adapter" + name = format("%s-kp-adapter", var.resource_prefix) +} + +resource "ibm_kms_kmip_client_cert" "mycert" { + instance_id = var.key_protect_instance_id + adapter_id = ibm_kms_kmip_adapter.sclae_kmip_adapter.adapter_id + certificate = data.local_file.kpclient_cert.content + name = format("%s-kp-cert", var.resource_prefix) + depends_on = [data.local_file.kpclient_cert] +} \ No newline at end of file diff --git a/modules/key_protect/outputs.tf b/modules/key_protect/outputs.tf new file mode 100644 index 00000000..e69de29b diff --git a/modules/key_protect/variables.tf b/modules/key_protect/variables.tf new file mode 100644 index 00000000..9cddfa07 --- /dev/null +++ b/modules/key_protect/variables.tf @@ -0,0 +1,6 @@ +variable "key_protect_instance_id" {} +variable "resource_prefix" {} +variable "vpc_region" {} +variable "resource_group_id" {} +variable "key_protect_path" {} +variable "vpc_storage_cluster_dns_domain" {} \ No newline at end of file diff --git a/modules/key_protect/version.tf b/modules/key_protect/version.tf new file mode 100644 index 00000000..886be456 --- /dev/null +++ b/modules/key_protect/version.tf @@ -0,0 +1,14 @@ +############################################################################## +# Terraform Providers +############################################################################## + +terraform { + required_version = ">= 1.9.0" + # Use "greater than or equal to" range for root level modules + required_providers { + ibm = { + source = "IBM-Cloud/ibm" + version = ">= 1.68.1, < 2.0.0" + } + } +} diff --git a/modules/landing_zone/locals.tf b/modules/landing_zone/locals.tf index 70d0eaea..d3832008 100644 --- a/modules/landing_zone/locals.tf +++ b/modules/landing_zone/locals.tf @@ -1,20 +1,8 @@ locals { # Defined values - name = "hpc" + name = "lsf" prefix = var.prefix tags = [local.prefix, local.name] - schematics_reserved_cidrs = [ - "169.44.0.0/14", - "169.60.0.0/14", - "158.175.0.0/16", - "158.176.0.0/15", - "141.125.0.0/16", - "161.156.0.0/16", - "149.81.0.0/16", - "159.122.111.224/27", - "150.238.230.128/27", - "169.55.82.128/27" - ] # Derived values @@ -57,7 +45,7 @@ locals { # Future use #zone_count = length(local.active_zones) - bastion_sg_variable_cidr_list = split(",", var.network_cidr) + bastion_sg_variable_cidr_list = split(",", var.cluster_cidr) # Address Prefixes calculation address_prefixes = { @@ -72,30 +60,35 @@ locals { acl_name = "hpc-acl" cidr = var.client_subnets_cidr[index(local.active_zones, zone)] public_gateway = true + no_addr_prefix = true } : null, { name = "compute-subnet-${zone}" acl_name = "hpc-acl" cidr = var.vpc_cluster_private_subnets_cidr_blocks[index(local.active_zones, zone)] public_gateway = true + no_addr_prefix = true }, local.storage_instance_count != 0 ? { name = "storage-subnet-${zone}" acl_name = "hpc-acl" cidr = var.storage_subnets_cidr[index(local.active_zones, zone)] public_gateway = true + no_addr_prefix = true } : null, local.storage_instance_count != 0 && local.protocol_instance_count != 0 ? { name = "protocol-subnet-${zone}" acl_name = "hpc-acl" cidr = var.protocol_subnets_cidr[index(local.active_zones, zone)] public_gateway = true + no_addr_prefix = true } : null, zone == local.active_zones[0] ? { name = "bastion-subnet" acl_name = "hpc-acl" cidr = var.vpc_cluster_login_private_subnets_cidr_blocks public_gateway = true + no_addr_prefix = true } : null ] : [] } @@ -110,12 +103,12 @@ locals { # If user defined then use existing else create new # Calculate network acl rules (can be done inplace in vpcs) # TODO: VPN expectation - cidrs_network_acl_rules = compact(flatten([local.schematics_reserved_cidrs, var.allowed_cidr, var.network_cidr, "161.26.0.0/16", "166.8.0.0/14", "0.0.0.0/0"])) + cidrs_network_acl_rules = compact(flatten(["0.0.0.0/0"])) network_acl_inbound_rules = [ for cidr_index in range(length(local.cidrs_network_acl_rules)) : { name = format("allow-inbound-%s", cidr_index + 1) action = "allow" - destination = var.network_cidr + destination = var.cluster_cidr direction = "inbound" source = element(local.cidrs_network_acl_rules, cidr_index) } @@ -126,7 +119,7 @@ locals { action = "allow" destination = element(local.cidrs_network_acl_rules, cidr_index) direction = "outbound" - source = var.network_cidr + source = var.cluster_cidr } ] network_acl_rules = flatten([local.network_acl_inbound_rules, local.network_acl_outbound_rules]) @@ -337,7 +330,7 @@ locals { } ]) : null - key_management = var.key_management == "key_protect" ? { + key_management = var.key_management == "key_protect" || (var.scale_encryption_enabled && var.scale_encryption_type == "key_protect" && var.key_protect_instance_id == null) ? { name = var.kms_instance_name != null ? var.kms_instance_name : format("%s-kms", var.prefix) # var.key_management == "hs_crypto" ? var.hpcs_instance_name : format("%s-kms", var.prefix) resource_group = local.service_resource_group use_hs_crypto = false @@ -399,7 +392,7 @@ locals { locals { env = { resource_groups = local.resource_groups - network_cidr = var.network_cidr + cluster_cidr = var.cluster_cidr vpcs = local.vpcs vpn_gateways = local.vpn_gateways enable_transit_gateway = local.enable_transit_gateway @@ -423,5 +416,6 @@ locals { f5_vsi = local.f5_vsi f5_template_data = local.f5_template_data skip_kms_block_storage_s2s_auth_policy = local.skip_kms_block_storage_s2s_auth_policy + } } diff --git a/modules/landing_zone/main.tf b/modules/landing_zone/main.tf index cea853e4..80688431 100644 --- a/modules/landing_zone/main.tf +++ b/modules/landing_zone/main.tf @@ -1,12 +1,12 @@ module "landing_zone" { count = var.enable_landing_zone ? 1 : 0 source = "terraform-ibm-modules/landing-zone/ibm" - version = "7.5.0" + version = "8.2.0" prefix = local.prefix region = local.region tags = local.tags resource_groups = local.env.resource_groups - network_cidr = local.env.network_cidr + network_cidr = local.env.cluster_cidr vpcs = local.env.vpcs vpn_gateways = local.env.vpn_gateways enable_transit_gateway = local.env.enable_transit_gateway diff --git a/modules/landing_zone/outputs.tf b/modules/landing_zone/outputs.tf index c1291e1e..600bc457 100644 --- a/modules/landing_zone/outputs.tf +++ b/modules/landing_zone/outputs.tf @@ -30,7 +30,7 @@ output "bastion_subnets" { id = subnet["id"] zone = subnet["zone"] cidr = subnet["cidr"] - } if strcontains(subnet["name"], "-hpc-bastion-subnet") + } if strcontains(subnet["name"], "-lsf-bastion-subnet") ] } @@ -41,7 +41,7 @@ output "client_subnets" { id = subnet["id"] zone = subnet["zone"] cidr = subnet["cidr"] - } if strcontains(subnet["name"], "-hpc-client-subnet") + } if strcontains(subnet["name"], "-lsf-client-subnet") ] } @@ -52,7 +52,7 @@ output "compute_subnets" { id = subnet["id"] zone = subnet["zone"] cidr = subnet["cidr"] - } if strcontains(subnet["name"], "-hpc-compute-subnet-zone-") + } if strcontains(subnet["name"], "-lsf-compute-subnet-zone-") ] } @@ -63,7 +63,7 @@ output "storage_subnets" { id = subnet["id"] zone = subnet["zone"] cidr = subnet["cidr"] - } if strcontains(subnet["name"], "-hpc-storage-subnet-zone-") + } if strcontains(subnet["name"], "-lsf-storage-subnet-zone-") ] } @@ -74,7 +74,7 @@ output "protocol_subnets" { id = subnet["id"] zone = subnet["zone"] cidr = subnet["cidr"] - } if strcontains(subnet["name"], "-hpc-protocol-subnet-zone-") + } if strcontains(subnet["name"], "-lsf-protocol-subnet-zone-") ] } @@ -94,6 +94,11 @@ output "key_management_guid" { value = var.enable_landing_zone ? var.key_management != null ? module.landing_zone[0].key_management_guid : null : null } +output "key_management_instance_id" { + description = "ID for KMS instance" + value = var.enable_landing_zone ? var.key_management != null || (var.scale_encryption_enabled && var.scale_encryption_type == "key_protect" && var.key_protect_instance_id == null) ? module.landing_zone[0].key_management_id : null : null +} + output "cos_buckets_data" { description = "COS buckets data" value = flatten(module.landing_zone[*].cos_bucket_data) diff --git a/modules/landing_zone/variables.tf b/modules/landing_zone/variables.tf index 9a4af9cb..accb1eb9 100644 --- a/modules/landing_zone/variables.tf +++ b/modules/landing_zone/variables.tf @@ -47,10 +47,10 @@ variable "vpc_name" { default = null } -variable "network_cidr" { - description = "Network CIDR for the VPC. This is used to manage network ACL rules for cluster provisioning." +variable "cluster_cidr" { + description = "Network CIDR of the VPC. This is used to manage network security rules for cluster provisioning." type = string - default = "10.0.0.0/8" + default = "10.241.0.0/18" } variable "placement_strategy" { @@ -74,12 +74,6 @@ variable "vpc_cluster_login_private_subnets_cidr_blocks" { description = "Provide the CIDR block required for the creation of the login cluster's private subnet. Only one CIDR block is needed. If using a hybrid environment, modify the CIDR block to avoid conflicts with any on-premises CIDR blocks. Since the login subnet is used only for the creation of login virtual server instances, provide a CIDR range of /28." } -variable "allowed_cidr" { - description = "Network CIDR to access the VPC. This is used to manage network ACL rules for accessing the cluster." - type = list(string) - default = ["10.0.0.0/8"] -} - ############################################################################## # Compute Variables ############################################################################## @@ -263,6 +257,27 @@ variable "kms_key_name" { description = "Provide the existing KMS encryption key name that you want to use for the IBM Cloud HPC cluster. (for example kms_key_name: my-encryption-key)." } + +##Scale Encryption Variables + +variable "scale_encryption_enabled" { + type = bool + default = false + description = "To enable the encryption for the filesystem. Select true or false" +} + +variable "scale_encryption_type" { + type = string + default = null + description = "To enable filesystem encryption, specify either 'key_protect' or 'gklm'. If neither is specified, the default value will be 'null' and encryption is disabled" +} + +variable "key_protect_instance_id" { + type = string + default = null + description = "An existing Key Protect instance used for filesystem encryption" +} + # variable "hpcs_instance_name" { # type = string # default = null diff --git a/modules/landing_zone_vsi/datasource.tf b/modules/landing_zone_vsi/datasource.tf index 31168445..55b62f4b 100644 --- a/modules/landing_zone_vsi/datasource.tf +++ b/modules/landing_zone_vsi/datasource.tf @@ -3,7 +3,7 @@ # } data "ibm_is_image" "management_stock_image" { - count = length(var.management_instances) + count = local.image_mapping_entry_found ? 0 : length(var.management_instances) name = var.management_instances[count.index]["image"] } @@ -38,7 +38,7 @@ data "ibm_is_image" "client" { } data "ibm_is_image" "compute_stock_image" { - count = length(var.static_compute_instances) + count = local.compute_image_found_in_map ? 0 : length(var.static_compute_instances) name = var.static_compute_instances[count.index]["image"] } @@ -93,6 +93,11 @@ data "ibm_is_image" "gklm" { name = var.gklm_instances[count.index]["image"] } +data "ibm_is_image" "login_vsi_image" { + count = local.login_image_found_in_map ? 0 : 1 + name = var.login_instance[count.index]["image"] +} + data "ibm_is_dedicated_host_profiles" "profiles" { count = var.enable_dedicated_host ? 1 : 0 } diff --git a/modules/landing_zone_vsi/image_map.tf b/modules/landing_zone_vsi/image_map.tf index 9c1277e8..f58ee9d7 100644 --- a/modules/landing_zone_vsi/image_map.tf +++ b/modules/landing_zone_vsi/image_map.tf @@ -1,37 +1,52 @@ locals { image_region_map = { - "hpcaas-lsf10-rhel810-compute-v8" = { - "eu-gb" = "r018-fd4a0927-72df-440c-93f9-f6a325ec90b6" - "eu-de" = "r010-3b541f40-64ab-41f2-ba96-720fd3862a85" - "us-east" = "r014-188b366f-25bb-4545-9bf9-11004bb4a016" - "us-south" = "r006-a99df2a9-5a28-4ba2-b964-0f7e5fd40ac1" - "jp-tok" = "r022-7d1e34af-b876-458a-b4b6-f7b5744ca8db" - "jp-osa" = "r034-a085a1b5-7f70-40a1-9d84-172d844dfbbc" - "au-syd" = "r026-5b600da8-6c93-42e8-9015-48d220180f3b" - "br-sao" = "r042-e8ed8280-b1c1-45ba-9fe2-aa5ece321799" - "ca-tor" = "r038-bbb8e69c-ddd0-42ab-bd74-b39904c4adfe" + "hpc-lsf-fp15-rhel810-v1" = { + "eu-es" = "r050-deeeb734-2523-4aff-96e3-2be8d2b0d634" + "eu-gb" = "r018-8edcd9a1-dbca-462f-bf74-017c15ca4b71" + "eu-de" = "r010-394c5295-1704-4066-b57e-ae9bca1968de" + "us-east" = "r014-1777cdcb-8a68-4ef0-becf-84ec0d2e9a26" + "us-south" = "r006-40caf671-28a8-42c5-b83e-b2ba3ceb86af" + "jp-tok" = "r022-01531301-d100-44ba-b1a3-12e7c8d65469" + "jp-osa" = "r034-ac455775-c667-4d3e-b281-9ef845080599" + "au-syd" = "r026-eff4d59c-5006-46cc-8b03-60514f763a87" + "br-sao" = "r042-1e1bbeeb-3ef7-4f7a-a44c-9f50609bb538" + "ca-tor" = "r038-bb9fcdb7-d200-4cdd-af04-6848007c9cb2" }, - "hpcaas-lsf10-ubuntu2204-compute-v8" = { - "us-east" = "r014-b8deeb5c-90d7-4c07-80a6-d9b130510661" - "eu-de" = "r010-1b56109c-b22c-4fca-91a9-e39e98c8d928" - "us-south" = "r006-eb1e8993-5455-4b98-8a9d-d6e1fe364c08" + "hpc-lsf-fp15-compute-rhel810-v1" = { + "eu-es" = "r050-f0608e39-9dcf-4aca-9e92-7719474b3e86" + "eu-gb" = "r018-db8b97a8-6f87-4cf7-a044-847da6ab5c59" + "eu-de" = "r010-957efd6b-e7b3-4249-8644-6184f1531915" + "us-east" = "r014-5fdd6a25-5943-4084-9c57-b900a80579a3" + "us-south" = "r006-5c0e462a-679c-4a18-81a5-0fe036f483a3" + "jp-tok" = "r022-8087a984-8912-42ff-9576-c5cab8edda3a" + "jp-osa" = "r034-728d1f12-7842-412c-97a0-9deb66c23962" + "au-syd" = "r026-f957ed22-9565-441c-bce6-f716360e02ea" + "br-sao" = "r042-7bf7d508-a7b1-4434-ae6a-6986f7042d4e" + "ca-tor" = "r038-a658da44-f1b4-4e02-826a-38b16e6ae98a" }, - "hpcaas-lsf10-rhel810-v12" = { - "us-east" = "r014-5ae97886-6bcb-4fde-9da3-740a513261a8" - "eu-de" = "r010-1c8df3b1-8def-45eb-82ac-ab2db1612bd9" - "us-south" = "r006-045e03ee-4cfa-4415-a4ec-d8bceadc1bdb" + "hpc-lsf-fp14-rhel810-v1" = { + "eu-es" = "r050-12a3533c-5fa1-4bcc-8765-7150a06e122e" + "eu-gb" = "r018-3ef87e4e-0f46-424a-b623-fa25215094c0" + "eu-de" = "r010-48e5560b-4d34-43ca-b824-2d85513f3188" + "us-east" = "r014-3719a4e2-6746-4eaf-844a-c3721b7c6d32" + "us-south" = "r006-e720ec63-5e8c-46ce-b7a2-51c454e64099" + "jp-tok" = "r022-917ce78b-dacf-4008-b6c0-4058bf59a5b4" + "jp-osa" = "r034-507fb655-4164-45b8-b1d7-f6cb2fbeafc9" + "au-syd" = "r026-01900450-7314-42ea-aee3-acf5179300c0" + "br-sao" = "r042-bb407137-93cf-4ec7-aa77-4702896fff97" + "ca-tor" = "r038-6683403d-1cf5-4f39-a96f-c8cbb2314ad5" }, - "hpc-lsf10-rhel810-v2" = { - "eu-es" = "r050-86c03f46-e10a-4edf-8fcf-103845362db9" - "eu-gb" = "r018-90675b8a-db1b-4a41-b5a0-f21c04cb7d57" - "eu-de" = "r010-dd925c68-d186-406b-a8f7-8d965c60512b" - "us-east" = "r014-4bc87a52-d377-43da-a042-aa1fa1629d28" - "us-south" = "r006-6540f00a-525d-4f62-8a35-f218520b37d2" - "jp-tok" = "r022-02a31841-c5ca-4527-a660-d8e5b1cfb29e" - "jp-osa" = "r034-c7e76920-e735-4702-b04c-1f2cffe170cb" - "au-syd" = "r026-ad5cdb8f-1c44-4267-8969-fe62ac0e93a4" - "br-sao" = "r042-b89b9b8c-a934-4f9d-88bc-b9a15866f223" - "ca-tor" = "r038-d5992a56-ddd1-4156-a98c-54ecef51ae3d" + "hpc-lsf-fp14-compute-rhel810-v1" = { + "eu-es" = "r050-d2ad9625-1668-4b2c-a8bb-6ef14678d3ed" + "eu-gb" = "r018-f1059503-27ec-44d4-a981-21be6225520a" + "eu-de" = "r010-8115b1f6-912e-4b55-89f1-e448c397115e" + "us-east" = "r014-5108884c-011b-4473-b585-0d43309c37e3" + "us-south" = "r006-68c6af72-1abf-4d13-bca1-4f42be5d2c70" + "jp-tok" = "r022-1932c5ec-b5a6-4262-aa56-6c6257c8297f" + "jp-osa" = "r034-50be9bd9-9623-4ffc-8ce7-aab66f674137" + "au-syd" = "r026-11aee148-c938-4524-91e6-8e6da5933a42" + "br-sao" = "r042-5cb62448-e771-4caf-a556-28fdf88acab9" + "ca-tor" = "r038-fa815ec1-d52e-42b2-8221-5b8c2145a248" } } } diff --git a/modules/landing_zone_vsi/locals.tf b/modules/landing_zone_vsi/locals.tf index 7bf35a5b..7be15efa 100644 --- a/modules/landing_zone_vsi/locals.tf +++ b/modules/landing_zone_vsi/locals.tf @@ -2,7 +2,7 @@ locals { # Future use # products = "scale" - name = "hpc" + name = "lsf" prefix = var.prefix tags = [local.prefix, local.name] vsi_interfaces = ["eth0", "eth1"] @@ -12,17 +12,23 @@ locals { # Region and Zone calculations region = join("-", slice(split("-", var.zones[0]), 0, 2)) - management_image_id = data.ibm_is_image.management_stock_image[*].id + # management_image_id = data.ibm_is_image.management_stock_image[*].id # Check whether an entry is found in the mapping file for the given management node image image_mapping_entry_found = contains(keys(local.image_region_map), var.management_instances[0]["image"]) new_image_id = local.image_mapping_entry_found ? local.image_region_map[var.management_instances[0]["image"]][local.region] : "Image not found with the given name" - compute_image_id = data.ibm_is_image.compute_stock_image[*].id + # compute_image_id = data.ibm_is_image.compute_stock_image[*].id # Check whether an entry is found in the mapping file for the given compute node image compute_image_found_in_map = contains(keys(local.image_region_map), var.static_compute_instances[0]["image"]) # If not found, assume the name is the id already (customer provided image) new_compute_image_id = local.compute_image_found_in_map ? local.image_region_map[var.static_compute_instances[0]["image"]][local.region] : "Image not found with the given name" + # login_image_id = data.ibm_is_image.login_vsi_image[*].id + # Check whether an entry is found in the mapping file for the given login node image + login_image_found_in_map = contains(keys(local.image_region_map), var.login_instance[0]["image"]) + # If not found, assume the name is the id already (customer provided image) + new_login_image_id = local.login_image_found_in_map ? local.image_region_map[var.login_instance[0]["image"]][local.region] : "Image not found with the given name" + products = var.scheduler == "Scale" ? "scale" : "lsf" block_storage_volumes = [for volume in coalesce(var.nsd_details, []) : { name = format("nsd-%s", index(var.nsd_details, volume) + 1) @@ -79,7 +85,8 @@ locals { ldap_node_name = format("%s-%s", local.prefix, "ldap") afm_node_name = format("%s-%s", local.prefix, "afm") gklm_node_name = format("%s-%s", local.prefix, "gklm") - cpmoute_management_node_name = format("%s-%s", local.prefix, "comp-mgmt") + compute_management_node_name = format("%s-%s", local.prefix, "comp-mgmt") + login_node_name = format("%s-%s", local.prefix, "login") # Future use /* @@ -144,10 +151,10 @@ locals { # Subnets # TODO: Multi-zone multi-vNIC VSIs deployment support (bug #https://github.ibm.com/GoldenEye/issues/issues/5830) # Findings: Singe zone multi-vNICs VSIs deployment & multi-zone single vNIC VSIs deployment are supported. - client_subnets = var.client_subnets - cluster_subnet_ids = var.cluster_subnet_ids - storage_subnets = var.storage_subnets - protocol_subnets = var.protocol_subnets + client_subnets = var.client_subnets + cluster_subnet_id = var.cluster_subnet_id + storage_subnets = var.storage_subnets + protocol_subnets = var.protocol_subnets compute_public_key_content = one(module.compute_key[*].public_key_content) compute_private_key_content = one(module.compute_key[*].private_key_content) @@ -207,7 +214,7 @@ locals { errors = concat( [ for vr in local.validation_results : - "ERROR: Instance profile '${vr.profile}' is not available in this region" + "ERROR: Dedicated Host for the instance profile '${vr.profile}' is not available in this region" if !vr.instance_valid ], [ @@ -251,7 +258,7 @@ check "profile_validation" { error_message = join("\n", concat( ["Deployment configuration invalid:"], local.errors, - ["", "Available CURRENT dedicated host profiles:"], + ["", "Available CURRENT dedicated host profiles in this region:"], [for p in local.current_dh_profiles : " - ${p.name} (${p.family})"] )) } @@ -259,7 +266,7 @@ check "profile_validation" { locals { - bastion_security_group = var.bastion_security_group_id_for_ref + bastion_security_group = var.bastion_security_group_id # Security group id client_security_group = local.enable_client ? module.client_sg[0].security_group_id_for_ref : null compute_security_group = local.enable_compute ? module.compute_sg[0].security_group_id_for_ref : null @@ -269,20 +276,33 @@ locals { [ { name = "client-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "client-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group }, - { name = "client-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group } + { name = "client-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "storage-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" }, + { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] : [ { name = "client-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, - { name = "client-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group } + { name = "client-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "storage-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] ) : (local.enable_compute ? [ { name = "client-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, - { name = "client-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group } + { name = "client-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "client-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" }, + { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] : [ - { name = "client-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group } + { name = "client-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "storage-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] ) @@ -292,24 +312,28 @@ locals { { name = "compute-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group }, { name = "compute-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group }, { name = "compute-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] : [ { name = "compute-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "compute-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group }, { name = "compute-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group }, - { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" }, ] ) : (local.enable_storage ? [ { name = "compute-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "compute-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group }, { name = "compute-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] : [ { name = "compute-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "compute-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] ) @@ -318,21 +342,25 @@ locals { { name = "compute-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "compute-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group }, { name = "compute-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] : [ { name = "compute-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "compute-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] ) : (local.enable_storage ? [ { name = "compute-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "compute-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] : [ { name = "compute-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] ) @@ -342,19 +370,49 @@ locals { [ { name = "storage-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "storage-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group }, - { name = "storage-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group } + { name = "storage-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "client-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group }, + { name = "client-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] : [ { name = "storage-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, { name = "storage-allow-computesg-inbound", direction = "inbound", remote = local.compute_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "client-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" }, + { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } + ] ) : (local.enable_storage ? [ { name = "storage-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, - { name = "storage-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group } + { name = "storage-allow-storagesg-inbound", direction = "inbound", remote = local.storage_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "client-allow-clientsg-inbound", direction = "inbound", remote = local.client_security_group }, + { name = "client-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } ] : [ - { name = "storage-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group } + { name = "storage-allow-bastionsg-inbound", direction = "inbound", remote = local.bastion_security_group }, + { name = "client-allow-network-inbound", direction = "inbound", remote = var.cluster_cidr }, + { name = "client-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" }, + { name = "compute-allow-all-outbound", direction = "outbound", remote = "0.0.0.0/0" } + ] ) + + storage_nfs_security_group_rules = [ + { + name = "allow-all-compute-sg" + direction = "inbound" + remote = local.compute_security_group + } + ] + + bastion_security_group_update_rule = local.enable_compute ? [ + { name = "bastion-allow-compute-sg", direction = "inbound", remote = local.compute_security_group } + ] : (local.enable_storage ? [ + { name = "bastion-allow-storage-sg", direction = "inbound", remote = local.storage_security_group } + ] : (local.enable_client ? [ + { name = "bastion-allow-client-sg", direction = "inbound", remote = local.client_security_group }] : [] + )) } diff --git a/modules/landing_zone_vsi/main.tf b/modules/landing_zone_vsi/main.tf index b2ac1be2..73c618f1 100644 --- a/modules/landing_zone_vsi/main.tf +++ b/modules/landing_zone_vsi/main.tf @@ -15,10 +15,29 @@ resource "null_resource" "entitlement_check" { } } +#Checks the Dedicated host profile and stops the build +resource "null_resource" "dedicated_host_validation" { + count = var.enable_dedicated_host && length(var.static_compute_instances) > 0 && local.should_validate_profile ? 1 : 0 + + provisioner "local-exec" { + command = < 0 ? local.dedicated_host_map[var.static_compute_instances[count.index]["profile"]] : null + depends_on = [module.dedicated_host, null_resource.dedicated_host_validation] } module "compute_cluster_management_vsi" { @@ -165,16 +212,16 @@ module "compute_cluster_management_vsi" { vsi_per_subnet = 1 create_security_group = false security_group = null - image_id = local.compute_image_id[count.index] + image_id = local.compute_image_found_in_map ? local.new_compute_image_id : data.ibm_is_image.compute_stock_image[0].id machine_type = var.static_compute_instances[count.index]["profile"] - prefix = count.index == 0 ? local.cpmoute_management_node_name : format("%s-%s", local.cpmoute_management_node_name, count.index) + prefix = count.index == 0 ? local.compute_management_node_name : format("%s-%s", local.compute_management_node_name, count.index) resource_group_id = var.resource_group enable_floating_ip = false security_group_ids = module.compute_sg[*].security_group_id ssh_key_ids = local.ssh_keys - subnets = local.cluster_subnet_ids + subnets = local.cluster_subnet_id tags = local.tags - user_data = data.template_file.compute_user_data.rendered + user_data = data.template_file.scale_compute_user_data.rendered vpc_id = var.vpc_id kms_encryption_enabled = var.kms_encryption_enabled skip_iam_authorization_policy = local.skip_iam_authorization_policy @@ -304,7 +351,7 @@ module "client_vsi" { module "protocol_vsi" { count = var.colocate_protocol_instances == true ? 0 : length(var.protocol_instances) source = "terraform-ibm-modules/landing-zone-vsi/ibm" - version = "5.0.0" + version = "5.1.20" vsi_per_subnet = var.protocol_instances[count.index]["count"] create_security_group = false security_group = null @@ -322,16 +369,16 @@ module "protocol_vsi" { kms_encryption_enabled = var.kms_encryption_enabled skip_iam_authorization_policy = local.skip_iam_authorization_policy boot_volume_encryption_key = var.boot_volume_encryption_key - existing_kms_instance_guid = var.existing_kms_instance_guid + # existing_kms_instance_guid = var.existing_kms_instance_guid # Bug: 5847 - LB profile & subnets are not configurable # load_balancers = local.enable_load_balancer ? local.load_balancers : [] - secondary_allow_ip_spoofing = true - secondary_security_groups = local.protocol_secondary_security_group - secondary_subnets = local.protocol_subnets - placement_group_id = var.placement_group_ids - manage_reserved_ips = true - primary_vni_additional_ip_count = var.protocol_instances[count.index]["count"] - depends_on = [resource.null_resource.entitlement_check] + secondary_allow_ip_spoofing = true + secondary_security_groups = local.protocol_secondary_security_group + secondary_subnets = local.protocol_subnets + # placement_group_id = var.placement_group_ids + manage_reserved_ips = true + # primary_vni_additional_ip_count = var.protocol_instances[count.index]["count"] + depends_on = [resource.null_resource.entitlement_check] # placement_group_id = var.placement_group_ids[(var.protocol_instances[count.index]["count"])%(length(var.placement_group_ids))] } @@ -399,7 +446,7 @@ module "ldap_vsi" { enable_floating_ip = false security_group_ids = local.products == "lsf" ? module.compute_sg[*].security_group_id : module.storage_sg[*].security_group_id ssh_key_ids = local.products == "lsf" ? local.ssh_keys : local.ldap_ssh_keys - subnets = local.products == "lsf" ? local.cluster_subnet_ids : [local.storage_subnets[0]] + subnets = local.products == "lsf" ? local.cluster_subnet_id : [local.storage_subnets[0]] tags = local.tags user_data = data.template_file.ldap_user_data.rendered vpc_id = var.vpc_id @@ -425,6 +472,7 @@ module "dedicated_host" { profile = each.value.profile family = each.value.family resource_group_id = var.resource_group + depends_on = [null_resource.dedicated_host_validation] } ######################################################################## @@ -432,14 +480,32 @@ module "dedicated_host" { ######################################################################## module "storage_baremetal" { + count = length(var.storage_servers) > 0 && var.storage_type == "persistent" ? 1 : 0 + source = "../baremetal" + existing_resource_group = var.resource_group + prefix = var.prefix + storage_subnets = [for subnet in local.storage_subnets : subnet.id] + storage_ssh_keys = local.ssh_keys + storage_servers = var.storage_servers + security_group_ids = module.storage_sg[*].security_group_id + bastion_public_key_content = var.bastion_public_key_content + storage_public_key_content = local.enable_storage ? module.storage_key[0].public_key_content : "" + storage_private_key_content = local.enable_storage ? module.storage_key[0].private_key_content : "" + bms_boot_drive_encryption = var.bms_boot_drive_encryption - count = length(var.storage_servers) > 0 && var.storage_type == "persistent" ? 1 : 0 - source = "../baremetal" - existing_resource_group = var.resource_group - prefix = var.prefix - storage_subnets = [for subnet in local.storage_subnets : subnet.id] - storage_ssh_keys = local.ssh_keys - storage_servers = var.storage_servers - security_group_ids = module.storage_sg[*].security_group_id - bastion_public_key_content = var.bastion_public_key_content } + +module "storage_baremetal_tie_breaker" { + count = length(var.storage_servers) > 0 && var.storage_type == "persistent" ? 1 : 0 + source = "../baremetal" + existing_resource_group = var.resource_group + prefix = format("%s-strg-tie", var.prefix) + storage_subnets = [for subnet in local.storage_subnets : subnet.id] + storage_ssh_keys = local.ssh_keys + storage_servers = var.tie_breaker_bm_server + security_group_ids = module.storage_sg[*].security_group_id + bastion_public_key_content = var.bastion_public_key_content + storage_public_key_content = local.enable_storage ? module.storage_key[0].public_key_content : "" + storage_private_key_content = local.enable_storage ? module.storage_key[0].private_key_content : "" + bms_boot_drive_encryption = var.bms_boot_drive_encryption +} \ No newline at end of file diff --git a/modules/landing_zone_vsi/outputs.tf b/modules/landing_zone_vsi/outputs.tf index 04956af6..572f61b1 100644 --- a/modules/landing_zone_vsi/outputs.tf +++ b/modules/landing_zone_vsi/outputs.tf @@ -18,6 +18,11 @@ output "compute_management_vsi_data" { value = module.compute_cluster_management_vsi[*]["list"] } +output "login_vsi_data" { + description = "Login VSI data" + value = module.login_vsi[*]["list"] +} + output "storage_vsi_data" { description = "Storage VSI data" value = module.storage_vsi[*]["list"] @@ -28,6 +33,21 @@ output "storage_bms_data" { value = flatten(module.storage_baremetal[*].list) } +output "storage_bm_name_with_vol_mapping" { + description = "Storage BareMetal Server data" + value = flatten(module.storage_baremetal[*].instance_ips_with_vol_mapping) +} + +output "storage_tie_breaker_bms_data" { + description = "Storage BareMetal Server data" + value = flatten(module.storage_baremetal_tie_breaker[*].list) +} + +output "storage_tie_breaker_bms_name_with_vol_mapping" { + description = "Storage BareMetal Server data" + value = flatten(module.storage_baremetal_tie_breaker[*].instance_ips_with_vol_mapping) +} + output "storage_cluster_management_vsi" { description = "Storage Management VSI data" value = module.storage_cluster_management_vsi[*]["list"] diff --git a/modules/landing_zone_vsi/template_files.tf b/modules/landing_zone_vsi/template_files.tf index a1cb5b4b..09230e9a 100644 --- a/modules/landing_zone_vsi/template_files.tf +++ b/modules/landing_zone_vsi/template_files.tf @@ -15,8 +15,8 @@ data "template_file" "client_user_data" { bastion_public_key_content = var.bastion_public_key_content != null ? var.bastion_public_key_content : "" client_public_key_content = local.enable_client ? local.compute_public_key_content != null ? local.compute_public_key_content : "" : "" client_private_key_content = local.enable_client ? local.compute_private_key_content != null ? local.compute_private_key_content : "" : "" - client_interfaces = var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] - client_dns_domain = var.dns_domain_names["compute"] + client_interfaces = local.vsi_interfaces[0] #var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] + client_dns_domain = var.dns_domain_names["client"] } } @@ -31,17 +31,39 @@ data "template_file" "management_user_data" { } } -data "template_file" "compute_user_data" { - template = file("${path.module}/templates/compute_user_data.tpl") +data "template_file" "lsf_compute_user_data" { + template = file("${path.module}/templates/lsf_compute_user_data.tpl") + vars = { + bastion_public_key_content = var.bastion_public_key_content != null ? var.bastion_public_key_content : "" + management_public_key_content = local.enable_compute ? local.compute_public_key_content != null ? local.compute_public_key_content : "" : "" + management_private_key_content = local.enable_compute ? local.compute_private_key_content != null ? local.compute_private_key_content : "" : "" + management_interfaces = var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] + management_dns_domain = var.dns_domain_names["compute"] + # TODO: Fix me + dynamic_compute_instances = var.dynamic_compute_instances == null ? "" : "" + } +} + +data "template_file" "login_user_data" { + template = file("${path.module}/templates/login_user_data.tpl") + vars = { + bastion_public_key_content = var.bastion_public_key_content != null ? var.bastion_public_key_content : "" + login_public_key_content = local.enable_compute ? local.compute_public_key_content != null ? local.compute_public_key_content : "" : "" + login_private_key_content = local.enable_compute ? local.compute_private_key_content != null ? local.compute_private_key_content : "" : "" + login_interfaces = var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] + login_dns_domain = var.dns_domain_names["compute"] + scheduler = var.scheduler + } +} + +data "template_file" "scale_compute_user_data" { + template = file("${path.module}/templates/scale_compute_user_data.tpl") vars = { bastion_public_key_content = var.bastion_public_key_content != null ? var.bastion_public_key_content : "" compute_public_key_content = local.enable_compute ? local.compute_public_key_content != null ? local.compute_public_key_content : "" : "" compute_private_key_content = local.enable_compute ? local.compute_private_key_content != null ? local.compute_private_key_content : "" : "" - compute_interfaces = var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] + compute_interfaces = local.vsi_interfaces[0] #var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] compute_dns_domain = var.dns_domain_names["compute"] - # TODO: Fix me - dynamic_compute_instances = var.dynamic_compute_instances == null ? "" : "" - scheduler = var.scheduler } } @@ -51,10 +73,11 @@ data "template_file" "storage_user_data" { bastion_public_key_content = var.bastion_public_key_content != null ? var.bastion_public_key_content : "" storage_public_key_content = local.enable_storage ? module.storage_key[0].public_key_content : "" storage_private_key_content = local.enable_storage ? module.storage_key[0].private_key_content : "" - storage_interfaces = var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] - storage_dns_domain = var.dns_domain_names["storage"] + storage_interfaces = local.vsi_interfaces[0] #var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] + protocol_interfaces = local.vsi_interfaces[1] + storage_dns_domain = local.enable_storage ? var.dns_domain_names["storage"] : "" storage_disk_type = var.storage_type == "scratch" ? data.ibm_is_instance_profile.storage[0].disks[0].quantity[0].type : "" - protocol_dns_domain = var.dns_domain_names["protocol"] + protocol_dns_domain = local.enable_protocol ? var.dns_domain_names["protocol"] : "" enable_protocol = local.enable_protocol vpc_region = var.vpc_region resource_group_id = var.resource_group @@ -68,10 +91,10 @@ data "template_file" "protocol_user_data" { bastion_public_key_content = var.bastion_public_key_content != null ? var.bastion_public_key_content : "" storage_public_key_content = local.enable_protocol ? module.storage_key[0].public_key_content : "" storage_private_key_content = local.enable_protocol ? module.storage_key[0].private_key_content : "" - storage_interfaces = var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] - protocol_interfaces = var.storage_type == "scratch" ? local.vsi_interfaces[1] : local.bms_interfaces[1] - storage_dns_domain = var.dns_domain_names["storage"] - protocol_dns_domain = var.dns_domain_names["protocol"] + storage_interfaces = local.vsi_interfaces[0] #var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] + protocol_interfaces = local.vsi_interfaces[1] #var.storage_type == "scratch" ? local.vsi_interfaces[1] : local.bms_interfaces[1] + storage_dns_domain = local.enable_storage ? var.dns_domain_names["storage"] : "" + protocol_dns_domain = local.enable_protocol ? var.dns_domain_names["protocol"] : "" vpc_region = var.vpc_region resource_group_id = var.resource_group protocol_subnets = local.enable_protocol ? local.protocol_subnets[0].id : "" @@ -84,8 +107,8 @@ data "template_file" "afm_user_data" { bastion_public_key_content = var.bastion_public_key_content != null ? var.bastion_public_key_content : "" storage_public_key_content = local.enable_storage ? module.storage_key[0].public_key_content : "" storage_private_key_content = local.enable_storage ? module.storage_key[0].private_key_content : "" - storage_interfaces = var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] - storage_dns_domain = var.dns_domain_names["storage"] + storage_interfaces = local.vsi_interfaces[0] #var.storage_type == "scratch" ? local.vsi_interfaces[0] : local.bms_interfaces[0] + storage_dns_domain = local.enable_storage ? var.dns_domain_names["storage"] : "" } } diff --git a/modules/landing_zone_vsi/templates/compute_user_data.tpl b/modules/landing_zone_vsi/templates/compute_user_data.tpl deleted file mode 100644 index ed017855..00000000 --- a/modules/landing_zone_vsi/templates/compute_user_data.tpl +++ /dev/null @@ -1,203 +0,0 @@ -#!/usr/bin/bash - -################################################### -# Copyright (C) IBM Corp. 2023 All Rights Reserved. -# Licensed under the Apache License v2.0 -################################################### - -################################################################################################################## -# Scale Compute Cluter User Data -################################################################################################################## - -if [ "${scheduler}" == "Scale" ]; then - #!/usr/bin/env bash - - exec > >(tee /var/log/ibm_spectrumscale_user-data.log) - - if grep -E -q "CentOS|Red Hat" /etc/os-release - then - USER=vpcuser - elif grep -q "Ubuntu" /etc/os-release - then - USER=ubuntu - fi - - sed -i -e "s/^/no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"echo \'Please login as the user \\\\\"$USER\\\\\" rather than the user \\\\\"root\\\\\".\';echo;sleep 10; exit 142\" /" ~/.ssh/authorized_keys - echo "${bastion_public_key_content}" >> ~/.ssh/authorized_keys - echo "${compute_public_key_content}" >> ~/.ssh/authorized_keys - echo "StrictHostKeyChecking no" >> ~/.ssh/config - echo "${compute_private_key_content}" > ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa - - if grep -q "Red Hat" /etc/os-release - then - USER=vpcuser - REQ_PKG_INSTALLED=0 - if grep -q "platform:el9" /etc/os-release - then - PACKAGE_MGR=dnf - package_list="python3 kernel-devel-$(uname -r) kernel-headers-$(uname -r) firewalld numactl make gcc-c++ elfutils-libelf-devel bind-utils iptables-nft nfs-utils elfutils elfutils-devel python3-dnf-plugin-versionlock" - elif grep -q "platform:el8" /etc/os-release - then - PACKAGE_MGR=dnf - package_list="python38 kernel-devel-$(uname -r) kernel-headers-$(uname -r) firewalld numactl jq make gcc-c++ elfutils-libelf-devel bind-utils iptables nfs-utils elfutils elfutils-devel python3-dnf-plugin-versionlock" - else - PACKAGE_MGR=yum - package_list="python3 kernel-devel-$(uname -r) kernel-headers-$(uname -r) rsync firewalld numactl make gcc-c++ elfutils-libelf-devel bind-utils iptables nfs-utils elfutils elfutils-devel yum-plugin-versionlock" - fi - - RETRY_LIMIT=5 - retry_count=0 - all_pkg_installed=1 - - while [[ $all_pkg_installed -ne 0 && $retry_count -lt $RETRY_LIMIT ]] - do - # Install all required packages - echo "INFO: Attempting to install packages" - $PACKAGE_MGR install -y $package_list - - # Check to ensure packages are installed - pkg_installed=0 - for pkg in $package_list - do - pkg_query=$($PACKAGE_MGR list installed $pkg) - pkg_installed=$(($? + $pkg_installed)) - done - if [[ $pkg_installed -ne 0 ]] - then - # The minimum required packages have not been installed. - echo "WARN: Required packages not installed. Sleeping for 60 seconds and retrying..." - touch /var/log/scale-rerun-package-install - echo "INFO: Cleaning and repopulating repository data" - $PACKAGE_MGR clean all - $PACKAGE_MGR makecache - sleep 60 - else - all_pkg_installed=0 - fi - retry_count=$(( $retry_count+1 )) - done - - elif grep -q "Ubuntu" /etc/os-release - then - USER=ubuntu - fi - - yum update --security -y - yum versionlock add $package_list - yum versionlock list - echo 'export PATH=$PATH:/usr/lpp/mmfs/bin' >> /root/.bashrc - - echo "DOMAIN=${compute_dns_domain}" >> "/etc/sysconfig/network-scripts/ifcfg-${compute_interfaces}" - echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-${compute_interfaces}" - chage -I -1 -m 0 -M 99999 -E -1 -W 14 vpcuser - sleep 120 - systemctl restart NetworkManager - - systemctl stop firewalld - firewall-offline-cmd --zone=public --add-port=1191/tcp - firewall-offline-cmd --zone=public --add-port=60000-61000/tcp - firewall-offline-cmd --zone=public --add-port=47080/tcp - firewall-offline-cmd --zone=public --add-port=47080/udp - firewall-offline-cmd --zone=public --add-port=47443/tcp - firewall-offline-cmd --zone=public --add-port=47443/udp - firewall-offline-cmd --zone=public --add-port=4444/tcp - firewall-offline-cmd --zone=public --add-port=4444/udp - firewall-offline-cmd --zone=public --add-port=4739/udp - firewall-offline-cmd --zone=public --add-port=4739/tcp - firewall-offline-cmd --zone=public --add-port=9084/tcp - firewall-offline-cmd --zone=public --add-port=9085/tcp - firewall-offline-cmd --zone=public --add-service=http - firewall-offline-cmd --zone=public --add-service=https - - systemctl start firewalld - systemctl enable firewalld - -else - -################################################################################################################## -# LSF Compute Cluter User Data -################################################################################################################## - - #!/usr/bin/env bash - if grep -E -q "CentOS|Red Hat" /etc/os-release - then - USER=vpcuser - elif grep -q "Ubuntu" /etc/os-release - then - USER=ubuntu - fi - sed -i -e "s/^/no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"echo \'Please client as the user \\\\\"$USER\\\\\" rather than the user \\\\\"root\\\\\".\';echo;sleep 5; exit 142\" /" /root/.ssh/authorized_keys - - # input parameters - echo "${bastion_public_key_content}" >> ~/.ssh/authorized_keys - echo "${compute_public_key_content}" >> ~/.ssh/authorized_keys - echo "StrictHostKeyChecking no" >> ~/.ssh/config - echo "${compute_private_key_content}" > ~/.ssh/id_rsa - chmod 600 ~/.ssh/id_rsa - - # Network Configuration - RESOLV_CONF="/etc/resolv.conf" - BACKUP_FILE="/etc/resolv.conf.bkp" - echo "DOMAIN=${compute_dns_domain}" >> "/etc/sysconfig/network-scripts/ifcfg-${compute_interfaces}" - echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-${compute_interfaces}" - chage -I -1 -m 0 -M 99999 -E -1 -W 14 vpcuser - sleep 20 - systemctl restart NetworkManager - - make_editable() { - if lsattr "$RESOLV_CONF" 2>/dev/null | grep -q 'i'; then - chattr -i "$RESOLV_CONF" - fi - } - - make_immutable() { - chattr +i "$RESOLV_CONF" - } - - restart_networkmanager() { - systemctl restart NetworkManager - } - - echo "Checking if 'Generated by NetworkManager' exists in $RESOLV_CONF..." - if ! grep -Fq "Generated by NetworkManager" "$RESOLV_CONF"; then - echo "NetworkManager not found, applying fix..." - sleep 20 - - if [ ! -f "$BACKUP_FILE" ]; then - cp "$RESOLV_CONF" "$BACKUP_FILE" - echo "Backup created at $BACKUP_FILE" - fi - - make_editable - - attempt=1 - max_attempts=5 - while ! grep -Fq "Generated by NetworkManager" "$RESOLV_CONF" && [ $attempt -le $max_attempts ]; do - echo "Attempt $attempt: 'Generated by NetworkManager' not found, restarting NetworkManager..." - restart_networkmanager - sleep 5 - attempt=$((attempt + 1)) - done - - if grep -q '^search ' "$RESOLV_CONF"; then - sed -i "s|^search .*|search ${compute_dns_domain}|" "$RESOLV_CONF" - else - sed -i "1i search ${compute_dns_domain}" "$RESOLV_CONF" - fi - - make_immutable - echo "Updated $RESOLV_CONF with search domain." - - if systemctl is-active --quiet NetworkManager; then - restart_networkmanager - echo "NetworkManager restarted." - else - echo "NetworkManager is not running." - fi - else - echo "Search domain already present, Updating $RESOLV_CONF has immutable." - make_immutable - fi - -fi diff --git a/modules/landing_zone_vsi/templates/gklm_user_data.tpl b/modules/landing_zone_vsi/templates/gklm_user_data.tpl index cb14c0eb..8ffa7d56 100644 --- a/modules/landing_zone_vsi/templates/gklm_user_data.tpl +++ b/modules/landing_zone_vsi/templates/gklm_user_data.tpl @@ -1,3 +1,5 @@ +#!/bin/bash + ################################################### # Copyright (C) IBM Corp. 2023 All Rights Reserved. # Licensed under the Apache License v2.0 diff --git a/modules/landing_zone_vsi/templates/login_user_data.tpl b/modules/landing_zone_vsi/templates/login_user_data.tpl new file mode 100644 index 00000000..b744f037 --- /dev/null +++ b/modules/landing_zone_vsi/templates/login_user_data.tpl @@ -0,0 +1,62 @@ +#!/usr/bin/bash + +################################################### +# Copyright (C) IBM Corp. 2023 All Rights Reserved. +# Licensed under the Apache License v2.0 +################################################### + +#!/usr/bin/env bash +if grep -E -q "CentOS|Red Hat" /etc/os-release +then + USER=vpcuser +elif grep -q "Ubuntu" /etc/os-release +then + USER=ubuntu +fi + +sed -i -e "s/^/no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"echo \'Please client as the user \\\\\"$USER\\\\\" rather than the user \\\\\"root\\\\\".\';echo;sleep 5; exit 142\" /" /root/.ssh/authorized_keys + +# input parameters +echo "${bastion_public_key_content}" >> ~/.ssh/authorized_keys +echo "${login_public_key_content}" >> ~/.ssh/authorized_keys +echo "StrictHostKeyChecking no" >> ~/.ssh/config +echo "${login_private_key_content}" > ~/.ssh/id_rsa +chmod 600 ~/.ssh/id_rsa + +# Network Configuration +RESOLV_CONF="/etc/resolv.conf" +BACKUP_FILE="/etc/resolv.conf.bkp" + +# Optional: backup the interface config +echo "DOMAIN=${login_dns_domain}" >> "/etc/sysconfig/network-scripts/ifcfg-${login_interfaces}" +echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-${login_interfaces}" +chage -I -1 -m 0 -M 99999 -E -1 -W 14 vpcuser +systemctl restart NetworkManager + +make_editable() { + if lsattr "$RESOLV_CONF" 2>/dev/null | grep -q 'i'; then + chattr -i "$RESOLV_CONF" + fi +} + +make_immutable() { + chattr +i "$RESOLV_CONF" +} + +# Backup if not already +if [ ! -f "$BACKUP_FILE" ]; then + cp "$RESOLV_CONF" "$BACKUP_FILE" + echo "Backup created at $BACKUP_FILE" +fi + +make_editable + +# Modify or insert 'search' domain +if grep -q '^search ' "$RESOLV_CONF"; then + sed -i "s/^search .*/search ${login_dns_domain}/" "$RESOLV_CONF" +else + echo "search ${login_dns_domain}" >> "$RESOLV_CONF" +fi + +make_immutable +echo "Updated $RESOLV_CONF with search domain '${login_dns_domain}' and locked file." diff --git a/modules/landing_zone_vsi/templates/lsf_compute_user_data.tpl b/modules/landing_zone_vsi/templates/lsf_compute_user_data.tpl new file mode 100644 index 00000000..b8f280a2 --- /dev/null +++ b/modules/landing_zone_vsi/templates/lsf_compute_user_data.tpl @@ -0,0 +1,62 @@ +#!/usr/bin/bash + +################################################### +# Copyright (C) IBM Corp. 2023 All Rights Reserved. +# Licensed under the Apache License v2.0 +################################################### + +#!/usr/bin/env bash +if grep -E -q "CentOS|Red Hat" /etc/os-release +then + USER=vpcuser +elif grep -q "Ubuntu" /etc/os-release +then + USER=ubuntu +fi + +sed -i -e "s/^/no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"echo \'Please client as the user \\\\\"$USER\\\\\" rather than the user \\\\\"root\\\\\".\';echo;sleep 5; exit 142\" /" /root/.ssh/authorized_keys + +# input parameters +echo "${bastion_public_key_content}" >> ~/.ssh/authorized_keys +echo "${management_public_key_content}" >> ~/.ssh/authorized_keys +echo "StrictHostKeyChecking no" >> ~/.ssh/config +echo "${management_private_key_content}" > ~/.ssh/id_rsa +chmod 600 ~/.ssh/id_rsa + +# Network Configuration +RESOLV_CONF="/etc/resolv.conf" +BACKUP_FILE="/etc/resolv.conf.bkp" + +# Optional: backup the interface config +echo "DOMAIN=${management_dns_domain}" >> "/etc/sysconfig/network-scripts/ifcfg-${management_interfaces}" +echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-${management_interfaces}" +chage -I -1 -m 0 -M 99999 -E -1 -W 14 vpcuser +systemctl restart NetworkManager + +make_editable() { + if lsattr "$RESOLV_CONF" 2>/dev/null | grep -q 'i'; then + chattr -i "$RESOLV_CONF" + fi +} + +make_immutable() { + chattr +i "$RESOLV_CONF" +} + +# Backup if not already +if [ ! -f "$BACKUP_FILE" ]; then + cp "$RESOLV_CONF" "$BACKUP_FILE" + echo "Backup created at $BACKUP_FILE" +fi + +make_editable + +# Modify or insert 'search' domain +if grep -q '^search ' "$RESOLV_CONF"; then + sed -i "s/^search .*/search ${management_dns_domain}/" "$RESOLV_CONF" +else + echo "search ${management_dns_domain}" >> "$RESOLV_CONF" +fi + +make_immutable +echo "Updated $RESOLV_CONF with search domain '${management_dns_domain}' and locked file." diff --git a/modules/landing_zone_vsi/templates/management_user_data.tpl b/modules/landing_zone_vsi/templates/management_user_data.tpl index 254594f5..b8f280a2 100644 --- a/modules/landing_zone_vsi/templates/management_user_data.tpl +++ b/modules/landing_zone_vsi/templates/management_user_data.tpl @@ -13,6 +13,7 @@ elif grep -q "Ubuntu" /etc/os-release then USER=ubuntu fi + sed -i -e "s/^/no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"echo \'Please client as the user \\\\\"$USER\\\\\" rather than the user \\\\\"root\\\\\".\';echo;sleep 5; exit 142\" /" /root/.ssh/authorized_keys # input parameters @@ -26,10 +27,10 @@ chmod 600 ~/.ssh/id_rsa RESOLV_CONF="/etc/resolv.conf" BACKUP_FILE="/etc/resolv.conf.bkp" +# Optional: backup the interface config echo "DOMAIN=${management_dns_domain}" >> "/etc/sysconfig/network-scripts/ifcfg-${management_interfaces}" echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-${management_interfaces}" chage -I -1 -m 0 -M 99999 -E -1 -W 14 vpcuser -sleep 20 systemctl restart NetworkManager make_editable() { @@ -42,48 +43,20 @@ make_immutable() { chattr +i "$RESOLV_CONF" } -restart_networkmanager() { - systemctl restart NetworkManager -} - -echo "Checking if 'Generated by NetworkManager' exists in $RESOLV_CONF..." -if ! grep -Fq "Generated by NetworkManager" "$RESOLV_CONF"; then - echo "NetworkManager not found, applying fix..." - - sleep 20 - - if [ ! -f "$BACKUP_FILE" ]; then - cp "$RESOLV_CONF" "$BACKUP_FILE" - echo "Backup created at $BACKUP_FILE" - fi - - make_editable - - attempt=1 - max_attempts=5 - while ! grep -Fq "Generated by NetworkManager" "$RESOLV_CONF" && [ $attempt -le $max_attempts ]; do - echo "Attempt $attempt: 'Generated by NetworkManager' not found, restarting NetworkManager..." - restart_networkmanager - sleep 5 - attempt=$((attempt + 1)) - done - - if grep -q '^search ' "$RESOLV_CONF"; then - sed -i "s|^search .*|search ${management_dns_domain}|" "$RESOLV_CONF" - else - sed -i "1i search ${management_dns_domain}" "$RESOLV_CONF" - fi +# Backup if not already +if [ ! -f "$BACKUP_FILE" ]; then + cp "$RESOLV_CONF" "$BACKUP_FILE" + echo "Backup created at $BACKUP_FILE" +fi - make_immutable - echo "Updated $RESOLV_CONF with search domain." +make_editable - if systemctl is-active --quiet NetworkManager; then - restart_networkmanager - echo "NetworkManager restarted." - else - echo "NetworkManager is not running." - fi +# Modify or insert 'search' domain +if grep -q '^search ' "$RESOLV_CONF"; then + sed -i "s/^search .*/search ${management_dns_domain}/" "$RESOLV_CONF" else - echo "Search domain already present, Updating $RESOLV_CONF has immutable." - make_immutable + echo "search ${management_dns_domain}" >> "$RESOLV_CONF" fi + +make_immutable +echo "Updated $RESOLV_CONF with search domain '${management_dns_domain}' and locked file." diff --git a/modules/landing_zone_vsi/templates/scale_compute_user_data.tpl b/modules/landing_zone_vsi/templates/scale_compute_user_data.tpl new file mode 100644 index 00000000..17a3a235 --- /dev/null +++ b/modules/landing_zone_vsi/templates/scale_compute_user_data.tpl @@ -0,0 +1,115 @@ +#!/usr/bin/bash + +################################################### +# Copyright (C) IBM Corp. 2023 All Rights Reserved. +# Licensed under the Apache License v2.0 +################################################### + +################################################################################################################## +# Scale Compute Cluter User Data +################################################################################################################## + +#!/usr/bin/env bash + +exec > >(tee /var/log/ibm_spectrumscale_user-data.log) + +if grep -E -q "CentOS|Red Hat" /etc/os-release +then + USER=vpcuser +elif grep -q "Ubuntu" /etc/os-release +then + USER=ubuntu +fi + +sed -i -e "s/^/no-port-forwarding,no-agent-forwarding,no-X11-forwarding,command=\"echo \'Please client as the user \\\\\"$USER\\\\\" rather than the user \\\\\"root\\\\\".\';echo;sleep 5; exit 142\" /" /root/.ssh/authorized_keys + +# input parameters +echo "${bastion_public_key_content}" >> ~/.ssh/authorized_keys +echo "${compute_public_key_content}" >> ~/.ssh/authorized_keys +echo "StrictHostKeyChecking no" >> ~/.ssh/config +echo "${compute_private_key_content}" > ~/.ssh/id_rsa +chmod 600 ~/.ssh/id_rsa + +if grep -q "Red Hat" /etc/os-release +then + USER=vpcuser + REQ_PKG_INSTALLED=0 + if grep -q "platform:el9" /etc/os-release + then + PACKAGE_MGR=dnf + package_list="python3 kernel-devel-$(uname -r) kernel-headers-$(uname -r) firewalld numactl make gcc-c++ elfutils-libelf-devel bind-utils iptables-nft nfs-utils elfutils elfutils-devel python3-dnf-plugin-versionlock" + elif grep -q "platform:el8" /etc/os-release + then + PACKAGE_MGR=dnf + package_list="python38 kernel-devel-$(uname -r) kernel-headers-$(uname -r) firewalld numactl jq make gcc-c++ elfutils-libelf-devel bind-utils iptables nfs-utils elfutils elfutils-devel python3-dnf-plugin-versionlock" + else + PACKAGE_MGR=yum + package_list="python3 kernel-devel-$(uname -r) kernel-headers-$(uname -r) rsync firewalld numactl make gcc-c++ elfutils-libelf-devel bind-utils iptables nfs-utils elfutils elfutils-devel yum-plugin-versionlock" + fi + + RETRY_LIMIT=5 + retry_count=0 + all_pkg_installed=1 + + while [[ $all_pkg_installed -ne 0 && $retry_count -lt $RETRY_LIMIT ]] + do + # Install all required packages + echo "INFO: Attempting to install packages" + $PACKAGE_MGR install -y $package_list + + # Check to ensure packages are installed + pkg_installed=0 + for pkg in $package_list + do + pkg_query=$($PACKAGE_MGR list installed $pkg) + pkg_installed=$(($? + $pkg_installed)) + done + if [[ $pkg_installed -ne 0 ]] + then + # The minimum required packages have not been installed. + echo "WARN: Required packages not installed. Sleeping for 60 seconds and retrying..." + touch /var/log/scale-rerun-package-install + echo "INFO: Cleaning and repopulating repository data" + $PACKAGE_MGR clean all + $PACKAGE_MGR makecache + sleep 60 + else + all_pkg_installed=0 + fi + retry_count=$(( $retry_count+1 )) + done + +elif grep -q "Ubuntu" /etc/os-release +then + USER=ubuntu +fi + +yum update --security -y +yum versionlock add $package_list +yum versionlock list +echo 'export PATH=$PATH:/usr/lpp/mmfs/bin' >> /root/.bashrc + +echo "DOMAIN=${compute_dns_domain}" >> "/etc/sysconfig/network-scripts/ifcfg-${compute_interfaces}" +echo "MTU=9000" >> "/etc/sysconfig/network-scripts/ifcfg-${compute_interfaces}" +chage -I -1 -m 0 -M 99999 -E -1 -W 14 vpcuser +sleep 120 +systemctl restart NetworkManager + +systemctl stop firewalld +firewall-offline-cmd --zone=public --add-port=1191/tcp +firewall-offline-cmd --zone=public --add-port=60000-61000/tcp +firewall-offline-cmd --zone=public --add-port=47080/tcp +firewall-offline-cmd --zone=public --add-port=47080/udp +firewall-offline-cmd --zone=public --add-port=47443/tcp +firewall-offline-cmd --zone=public --add-port=47443/udp +firewall-offline-cmd --zone=public --add-port=4444/tcp +firewall-offline-cmd --zone=public --add-port=4444/udp +firewall-offline-cmd --zone=public --add-port=4739/udp +firewall-offline-cmd --zone=public --add-port=4739/tcp +firewall-offline-cmd --zone=public --add-port=9084/tcp +firewall-offline-cmd --zone=public --add-port=9085/tcp +firewall-offline-cmd --zone=public --add-service=http +firewall-offline-cmd --zone=public --add-service=https + +systemctl start firewalld +systemctl enable firewalld diff --git a/modules/landing_zone_vsi/variables.tf b/modules/landing_zone_vsi/variables.tf index c8a7bd8b..959f2157 100644 --- a/modules/landing_zone_vsi/variables.tf +++ b/modules/landing_zone_vsi/variables.tf @@ -36,6 +36,13 @@ variable "zones" { type = list(string) } + +variable "cluster_cidr" { + description = "Network CIDR of the VPC. This is used to manage network security rules for cluster provisioning." + type = string + default = "10.241.0.0/18" +} + ############################################################################## # VPC Variables ############################################################################## @@ -67,12 +74,6 @@ variable "bastion_public_key_content" { description = "Bastion security group id." } -variable "bastion_security_group_id_for_ref" { - type = string - description = "Bastion security group id for ref." -} - - variable "storage_security_group_id" { type = string default = null @@ -110,7 +111,7 @@ variable "client_instances" { description = "Number of instances to be launched for client." } -variable "cluster_subnet_ids" { +variable "cluster_subnet_id" { type = list(object({ name = string id = string @@ -240,6 +241,24 @@ variable "storage_servers" { description = "Number of BareMetal Servers to be launched for storage cluster." } +variable "tie_breaker_bm_server" { + type = list( + object({ + profile = string + count = number + image = string + filesystem = string + }) + ) + default = [{ + profile = "cx2d-metal-96x192" + count = 1 + image = "ibm-redhat-8-10-minimal-amd64-4" + filesystem = "fs1" + }] + description = "BareMetal Server to be launched for Tie Breaker." +} + variable "protocol_subnets" { type = list(object({ name = string @@ -292,13 +311,17 @@ variable "nsd_details" { variable "dns_domain_names" { type = object({ compute = string - storage = string - protocol = string + storage = optional(string) + protocol = optional(string) + client = optional(string) + gklm = optional(string) }) default = { compute = "comp.com" storage = "strg.com" protocol = "ces.com" + client = "clnt.com" + gklm = "gklm.com" } description = "IBM Cloud HPC DNS domain names." } @@ -344,10 +367,10 @@ variable "existing_kms_instance_guid" { # description = "Compute security key content." # } -variable "enable_bastion" { +variable "enable_deployer" { type = bool default = true - description = "The solution supports multiple ways to connect to your HPC cluster for example, using bastion node, via VPN or direct connection. If connecting to the HPC cluster via VPN or direct connection, set this value to false." + description = "Deployer should be only used for better deployment performance" } ############################################################################# @@ -466,3 +489,37 @@ variable "enable_dedicated_host" { default = false description = "Enables dedicated host to the compute instances" } + +############################################################################## +# Login Variables +############################################################################## +variable "login_instance" { + type = list( + object({ + profile = string + image = string + }) + ) + default = [{ + profile = "bx2-2x8" + image = "hpcaas-lsf10-rhel810-compute-v8" + }] + description = "Number of instances to be launched for login node." +} + +variable "bastion_subnets" { + type = list(object({ + name = string + id = string + zone = string + cidr = string + })) + default = [] + description = "Subnets to launch the bastion host." +} + +variable "bms_boot_drive_encryption" { + type = bool + default = false + description = "To enable the encryption for the boot drive of bare metal server. Select true or false" +} \ No newline at end of file diff --git a/modules/playbook/main.tf b/modules/playbook/main.tf index e7c283b1..6e4a2a93 100644 --- a/modules/playbook/main.tf +++ b/modules/playbook/main.tf @@ -1,18 +1,35 @@ locals { - proxyjump = var.enable_bastion ? "-o ProxyJump=ubuntu@${var.bastion_fip}" : "" - ldap_server_inventory = format("%s/ldap_server_inventory.ini", var.playbooks_path) - configure_ldap_client = format("%s/configure_ldap_client.yml", var.playbooks_path) - prepare_ldap_server = format("%s/prepare_ldap_server.yml", var.playbooks_path) - dns_resolver_playbook = format("%s/dns_resolver.yml", var.playbooks_path) + proxyjump = var.enable_deployer ? "-o ProxyJump=ubuntu@${var.bastion_fip}" : "" + common_config_playbook = format("%s/common_config_playbook.yml", var.playbooks_path) + pre_lsf_config_playbook = format("%s/pre_lsf_config_playbook.yml", var.playbooks_path) + login_node_playbook = format("%s/login_node_configuration.yml", var.playbooks_path) + lsf_post_config_playbook = format("%s/lsf_post_config_playbook.yml", var.playbooks_path) + ldap_server_inventory = format("%s/ldap_server_inventory.ini", var.playbooks_path) + configure_ldap_client = format("%s/configure_ldap_client.yml", var.playbooks_path) + prepare_ldap_server = format("%s/prepare_ldap_server.yml", var.playbooks_path) + deployer_hostentry_playbook_path = format("%s/deployer_host_entry_play.yml", var.playbooks_path) + lsf_hostentry_playbook_path = format("%s/lsf_host_entry_play.yml", var.playbooks_path) + remove_hostentry_playbooks_path = format("%s/remove_host_entry_play.yml", var.playbooks_path) + lsf_prerequesite_playbook_path = format("%s/lsf_prerequesite_play.yml", var.playbooks_path) + deployer_host = jsonencode(var.deployer_host) + mgmnt_hosts = jsonencode(var.mgmnt_hosts) + comp_hosts = jsonencode(var.comp_hosts) + login_host = jsonencode(var.login_host) + # domain_name = var.domain_name } -resource "local_file" "create_playbook_for_dns_resolver" { - count = var.scheduler == "LSF" ? 1 : 0 +resource "local_file" "deployer_host_entry_play" { + count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 content = <- + {{ {} | combine(mgmnt_hosts | from_json, comp_hosts | from_json, login_host | from_json) }} + + - name: Invert mapping to ensure 1 hostname = 1 IP (latest IP kept) + ansible.builtin.set_fact: + hostname_map: >- + {{ + all_hosts + | dict2items + | reverse + | items2dict(key_name='value', value_name='key') + }} + + - name: Generate managed block content + ansible.builtin.set_fact: + managed_block: | + {% for hostname, ip in hostname_map.items() -%} + {{ ip }} {{ hostname }} {{ hostname }}.{{ domain_name }} + {% endfor %} + + - name: Update /etc/hosts with managed entries + ansible.builtin.blockinfile: + path: "{{ hosts_file }}" + marker: "# === ANSIBLE MANAGED HOSTS {mark} ===" + block: "{{ managed_block }}" - name: Insert Create folder and Ensure js.conf lines ansible.builtin.blockinfile: @@ -85,27 +84,29 @@ resource "local_file" "create_playbook_for_dns_resolver" { mode: '0644' {% endraw %} EOT - filename = local.dns_resolver_playbook + filename = local.deployer_hostentry_playbook_path } -resource "null_resource" "configure_dns_resolver" { - count = var.scheduler == "LSF" ? 1 : 0 +resource "null_resource" "deploy_host_playbook" { + count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 provisioner "local-exec" { interpreter = ["/bin/bash", "-c"] - command = "ansible-playbook ${local.dns_resolver_playbook}" + command = "sudo ansible-playbook -f 200 -e 'mgmnt_hosts=${local.mgmnt_hosts}' -e 'comp_hosts=${local.comp_hosts}' -e 'login_host=${local.login_host}' -e 'domain_name=${var.domain_name}' '${local.deployer_hostentry_playbook_path}'" } + triggers = { build = timestamp() } + depends_on = [local_file.deployer_host_entry_play] } -resource "local_file" "create_playbook" { +resource "local_file" "lsf_host_entry_playbook" { count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 content = <- + {{ {} | combine(mgmnt_hosts | from_json, comp_hosts | from_json, login_host | from_json, deployer_host | from_json) }} + + - name: Invert mapping to ensure 1 hostname = 1 IP (latest IP kept) + ansible.builtin.set_fact: + hostname_map: >- + {{ + all_hosts + | dict2items + | reverse + | items2dict(key_name='value', value_name='key') + }} + + - name: Generate managed block content + ansible.builtin.set_fact: + managed_block: | + {% for hostname, ip in hostname_map.items() -%} + {{ ip }} {{ hostname }} {{ hostname }}.{{ domain_name }} + {% endfor %} + + - name: Update /etc/hosts with managed entries + ansible.builtin.blockinfile: + path: "{{ hosts_file }}" + marker: "# === ANSIBLE MANAGED HOSTS {mark} ===" + block: "{{ managed_block }}" +EOT + filename = local.lsf_hostentry_playbook_path +} + +resource "null_resource" "lsf_host_play" { + count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 + provisioner "local-exec" { + interpreter = ["/bin/bash", "-c"] + command = "sudo ansible-playbook -f 50 -e 'deployer_host=${local.deployer_host}' -e 'mgmnt_hosts=${local.mgmnt_hosts}' -e 'comp_hosts=${local.comp_hosts}' -e 'login_host=${local.login_host}' -e 'domain_name=${var.domain_name}' -i ${var.inventory_path} '${local.lsf_hostentry_playbook_path}'" + } + + triggers = { + build = timestamp() + } + depends_on = [null_resource.deploy_host_playbook, local_file.lsf_host_entry_playbook] +} + +resource "local_file" "create_common_config_playbook" { + count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 + content = < + ${local.proxyjump} + -o ControlMaster=auto + -o ControlPersist=30m + -o UserKnownHostsFile=/dev/null + -o StrictHostKeyChecking=no + ansible_user: root + ansible_ssh_private_key_file: ${var.private_key_path} + pre_tasks: + - name: Load cluster-specific variables + include_vars: all.json + roles: + - { role: lsf_template_config } +EOT + filename = local.pre_lsf_config_playbook +} + +resource "null_resource" "run_pre_lsf_config_playbook" { + count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 + provisioner "local-exec" { + interpreter = ["/bin/bash", "-c"] + command = "sudo ansible-playbook -f 200 -i ${var.inventory_path} ${local.pre_lsf_config_playbook}" + } + triggers = { + build = timestamp() + } + depends_on = [local_file.create_pre_lsf_config_playbook, null_resource.run_common_config_playbook] +} + +resource "local_file" "lsf_prerequesite_playbook" { + count = var.inventory_path != null && var.scheduler == "LSF" && var.enable_dedicated_host ? 1 : 0 + content = < + ${local.proxyjump} + -o ControlMaster=auto + -o ControlPersist=30m + -o UserKnownHostsFile=/dev/null + -o StrictHostKeyChecking=no + ansible_user: root + ansible_ssh_private_key_file: ${var.private_key_path} + pre_tasks: + - name: Load cluster-specific variables + include_vars: all.json + roles: + - { role: lsf_login_config } +EOT + filename = local.login_node_playbook +} + + +resource "null_resource" "run_playbook_for_login_node_config" { + count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 + provisioner "local-exec" { + interpreter = ["/bin/bash", "-c"] + command = "sudo ansible-playbook -f 200 -i ${var.inventory_path} ${local.login_node_playbook}" } triggers = { build = timestamp() @@ -222,6 +429,44 @@ resource "null_resource" "run_playbook_for_mgmt_config" { depends_on = [local_file.create_playbook_for_mgmt_config, null_resource.run_lsf_playbooks] } +resource "local_file" "create_playbook_for_post_deploy_config" { + count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 + content = < + ${local.proxyjump} + -o ControlMaster=auto + -o ControlPersist=30m + -o UserKnownHostsFile=/dev/null + -o StrictHostKeyChecking=no + ansible_user: root + ansible_ssh_private_key_file: ${var.private_key_path} + pre_tasks: + - name: Load cluster-specific variables + include_vars: all.json + roles: + - { role: lsf_post_config } +EOT + filename = local.lsf_post_config_playbook +} + + +resource "null_resource" "run_playbook_post_deploy_config" { + count = var.inventory_path != null && var.scheduler == "LSF" ? 1 : 0 + provisioner "local-exec" { + interpreter = ["/bin/bash", "-c"] + command = "sudo ansible-playbook -f 200 -i ${var.inventory_path} ${local.lsf_post_config_playbook}" + } + triggers = { + build = timestamp() + } + depends_on = [local_file.create_playbook_for_post_deploy_config, null_resource.run_playbook_for_mgmt_config, null_resource.run_playbook_for_login_node_config] +} + resource "local_file" "prepare_ldap_server_playbook" { count = local.ldap_server_inventory != null && var.enable_ldap && var.ldap_server == "null" && var.scheduler == "LSF" ? 1 : 0 content = < 0 ? var.dns_custom_resolver_id : null) : var.dns_custom_resolver_id) - dns_instance_id = jsonencode(var.dns_instance_id != null ? (length(var.dns_instance_id) > 0 ? var.dns_instance_id : null) : var.dns_instance_id) - list_ldap_instances = jsonencode(var.ldap_instances) - ldap_server = jsonencode(var.ldap_server) - ldap_basedns = jsonencode(var.ldap_basedns) - list_ldap_ssh_keys = jsonencode(var.ldap_instance_key_pair) - list_afm_instances = jsonencode(var.afm_instances) - afm_cos_config_details = jsonencode(var.afm_cos_config) - list_gklm_ssh_keys = jsonencode(var.gklm_instance_key_pair) - list_gklm_instances = jsonencode(var.gklm_instances) - scale_encryption_type = jsonencode(var.scale_encryption_type) - filesystem_config = jsonencode(var.filesystem_config) - scale_encryption_admin_password = jsonencode(var.scale_encryption_admin_password) - custom_file_shares = jsonencode(var.custom_file_shares) - resource_group_ids = jsonencode(var.resource_group_ids) - existing_bastion_instance_name = jsonencode(var.existing_bastion_instance_name == null ? null : var.existing_bastion_instance_name) + schematics_inputs_path = format("/tmp/.schematics/%s/solution_terraform.auto.tfvars.json", var.cluster_prefix) + scheduler = var.scheduler == null ? "null" : var.scheduler + ibm_customer_number = var.ibm_customer_number == null ? "" : var.ibm_customer_number + storage_security_group_id = var.storage_security_group_id == null ? "" : var.storage_security_group_id + zones = jsonencode(var.zones) + list_ssh_keys = jsonencode(var.ssh_keys) + list_storage_instances = jsonencode(var.storage_instances) + list_storage_servers = jsonencode(var.storage_servers) + list_tie_breaker_bm_server = jsonencode(var.tie_breaker_bm_server) + list_management_instances = jsonencode(var.management_instances) + list_protocol_instances = jsonencode(var.protocol_instances) + list_compute_instances = jsonencode(var.static_compute_instances) + list_client_instances = jsonencode(var.client_instances) + remote_allowed_ips = jsonencode(var.remote_allowed_ips) + list_storage_subnets = jsonencode(length(var.storage_subnets) == 0 ? null : var.storage_subnets) + list_protocol_subnets = jsonencode(length(var.protocol_subnets) == 0 ? null : var.protocol_subnets) + list_cluster_subnet_id = jsonencode(length(var.cluster_subnet_id) == 0 ? null : var.cluster_subnet_id) + list_client_subnets = jsonencode(length(var.client_subnets) == 0 ? null : var.client_subnets) + list_login_subnet_ids = jsonencode(length(var.login_subnet_id) == 0 ? null : var.login_subnet_id) + dns_domain_names = jsonencode(var.dns_domain_names) + dynamic_compute_instances = jsonencode(var.dynamic_compute_instances) + kms_key_name = jsonencode(var.kms_key_name) + kms_instance_name = jsonencode(var.kms_instance_name) + key_management = jsonencode(var.key_management) + boot_volume_encryption_key = jsonencode(var.boot_volume_encryption_key) + existing_kms_instance_guid = jsonencode(var.existing_kms_instance_guid) + dns_custom_resolver_id = jsonencode(var.dns_custom_resolver_id != null ? (length(var.dns_custom_resolver_id) > 0 ? var.dns_custom_resolver_id : null) : var.dns_custom_resolver_id) + dns_instance_id = jsonencode(var.dns_instance_id != null ? (length(var.dns_instance_id) > 0 ? var.dns_instance_id : null) : var.dns_instance_id) + list_ldap_instances = jsonencode(var.ldap_instance) + ldap_server = jsonencode(var.ldap_server) + ldap_basedns = jsonencode(var.ldap_basedns) + list_ldap_ssh_keys = jsonencode(var.ldap_instance_key_pair) + list_afm_instances = jsonencode(var.afm_instances) + afm_cos_config_details = jsonencode(var.afm_cos_config) + list_gklm_ssh_keys = jsonencode(var.gklm_instance_key_pair) + list_gklm_instances = jsonencode(var.gklm_instances) + scale_encryption_type = jsonencode(var.scale_encryption_type) + filesystem_config = jsonencode(var.filesystem_config) + scale_encryption_admin_username = jsonencode(var.scale_encryption_admin_username) + scale_encryption_admin_default_password = jsonencode(var.scale_encryption_admin_default_password) + scale_encryption_admin_password = jsonencode(var.scale_encryption_admin_password) + key_protect_instance_id = jsonencode(var.key_protect_instance_id) + custom_file_shares = jsonencode(var.custom_file_shares) + resource_group_ids = jsonencode(var.resource_group_ids) + existing_bastion_instance_name = jsonencode(var.existing_bastion_instance_name == null ? null : var.existing_bastion_instance_name) + existing_bastion_security_group_id = jsonencode(var.existing_bastion_security_group_id == null ? null : var.existing_bastion_security_group_id) + login_instance = jsonencode(var.login_instance) } diff --git a/modules/prepare_tf_input/main.tf b/modules/prepare_tf_input/main.tf index 91f2187f..e75ce2c3 100644 --- a/modules/prepare_tf_input/main.tf +++ b/modules/prepare_tf_input/main.tf @@ -4,18 +4,18 @@ resource "local_sensitive_file" "prepare_tf_input" { { "scheduler": "${local.scheduler}", "ibmcloud_api_key": "${var.ibmcloud_api_key}", + "app_center_gui_password": "${var.app_center_gui_password}", "lsf_version": "${var.lsf_version}", - "github_token": "${var.github_token}", "resource_group_ids": ${local.resource_group_ids}, "cluster_prefix": "${var.cluster_prefix}", "zones": ${local.zones}, "enable_landing_zone": false, "enable_deployer": false, - "enable_bastion": false, "existing_bastion_instance_name": ${local.existing_bastion_instance_name}, "ssh_keys": ${local.list_ssh_keys}, "storage_instances": ${local.list_storage_instances}, "storage_servers": ${local.list_storage_servers}, + "tie_breaker_bm_server": ${local.list_tie_breaker_bm_server}, "storage_type": "${var.storage_type}", "management_instances": ${local.list_management_instances}, "protocol_instances": ${local.list_protocol_instances}, @@ -31,7 +31,7 @@ resource "local_sensitive_file" "prepare_tf_input" { "vpc_name": "${var.vpc_name}", "storage_subnets": ${local.list_storage_subnets}, "protocol_subnets": ${local.list_protocol_subnets}, - "cluster_subnet_ids": ${local.list_cluster_subnet_ids}, + "cluster_subnet_id": ${local.list_cluster_subnet_id}, "client_subnets": ${local.list_client_subnets}, "login_subnet_id": ${local.list_login_subnet_ids}, "dns_domain_names": ${local.dns_domain_names}, @@ -44,11 +44,11 @@ resource "local_sensitive_file" "prepare_tf_input" { "dns_custom_resolver_id": ${local.dns_custom_resolver_id}, "dns_instance_id": ${local.dns_instance_id}, "bastion_security_group_id": "${var.bastion_security_group_id}", - "bastion_security_group_id_for_ref": "${var.bastion_security_group_id_for_ref}", + "existing_bastion_security_group_id": ${local.existing_bastion_security_group_id}, "deployer_hostname": "${var.deployer_hostname}", "deployer_ip": "${var.deployer_ip}", "bastion_fip": "${var.bastion_fip}", - "ldap_instances": ${local.list_ldap_instances}, + "ldap_instance": ${local.list_ldap_instances}, "enable_ldap": ${var.enable_ldap}, "ldap_server": ${local.ldap_server}, "ldap_basedns": ${local.ldap_basedns}, @@ -63,7 +63,10 @@ resource "local_sensitive_file" "prepare_tf_input" { "scale_encryption_type": ${local.scale_encryption_type}, "gklm_instance_key_pair": ${local.list_gklm_ssh_keys}, "gklm_instances": ${local.list_gklm_instances}, - "scale_encryption_admin_password": "${local.scale_encryption_admin_password}", + "scale_encryption_admin_username": ${local.scale_encryption_admin_username}, + "scale_encryption_admin_default_password": ${local.scale_encryption_admin_default_password}, + "scale_encryption_admin_password": ${local.scale_encryption_admin_password}, + "key_protect_instance_id": "${local.key_protect_instance_id}", "filesystem_config": ${local.filesystem_config}, "enable_hyperthreading": ${var.enable_hyperthreading}, "scc_enable": ${var.scc_enable}, @@ -86,7 +89,10 @@ resource "local_sensitive_file" "prepare_tf_input" { "observability_atracker_target_type": "${var.observability_atracker_target_type}", "enable_dedicated_host": "${var.enable_dedicated_host}", "storage_security_group_id": "${local.storage_security_group_id}", - "custom_file_shares": ${local.custom_file_shares} + "custom_file_shares": ${local.custom_file_shares}, + "login_instance": ${local.login_instance}, + "vpc_cluster_private_subnets_cidr_blocks": "${var.vpc_cluster_private_subnets_cidr_blocks}", + "bms_boot_drive_encryption": ${var.bms_boot_drive_encryption} } EOT filename = local.schematics_inputs_path diff --git a/modules/prepare_tf_input/variables.tf b/modules/prepare_tf_input/variables.tf index cf2522af..1def5b3c 100644 --- a/modules/prepare_tf_input/variables.tf +++ b/modules/prepare_tf_input/variables.tf @@ -8,13 +8,6 @@ variable "ibmcloud_api_key" { description = "IBM Cloud API Key that will be used for authentication in scripts run in this module. Only required if certain options are required." } -# Delete this variable before pushing to the public repository. -variable "github_token" { - type = string - default = null - description = "Provide your GitHub token to download the HPCaaS code into the Deployer node" -} - variable "lsf_version" { type = string default = "fixpack_15" @@ -26,7 +19,7 @@ variable "lsf_version" { ############################################################################## variable "cluster_prefix" { type = string - default = "hpc" + default = "lsf" description = "A unique identifier for resources. Must begin with a letter and end with a letter or number. This cluster_prefix will be prepended to any resources provisioned by this template. Prefixes must be 16 or fewer characters." validation { error_message = "Prefix must begin and end with a letter and contain only letters, numbers, and - characters." @@ -77,7 +70,7 @@ variable "client_instances" { description = "Number of instances to be launched for client." } -variable "cluster_subnet_ids" { +variable "cluster_subnet_id" { type = string default = null description = "Name of an existing subnets in which the cluster resources will be deployed. If no value is given, then new subnet(s) will be provisioned for the cluster. [Learn more](https://cloud.ibm.com/docs/vpc)" @@ -97,10 +90,9 @@ variable "management_instances" { variable "static_compute_instances" { type = list( object({ - profile = string - count = number - image = string - filesystem = string + profile = string + count = number + image = string }) ) description = "Min Number of instances to be launched for compute cluster." @@ -165,6 +157,24 @@ variable "storage_servers" { description = "Number of BareMetal Servers to be launched for storage cluster." } +variable "tie_breaker_bm_server" { + type = list( + object({ + profile = string + count = number + image = string + filesystem = string + }) + ) + default = [{ + profile = "cx2d-metal-96x192" + count = 1 + image = "ibm-redhat-8-10-minimal-amd64-4" + filesystem = "fs1" + }] + description = "BareMetal Server to be launched for Tie Breaker." +} + variable "protocol_instances" { type = list( object({ @@ -265,10 +275,10 @@ variable "bastion_security_group_id" { description = "bastion security group id" } -variable "bastion_security_group_id_for_ref" { +variable "existing_bastion_security_group_id" { type = string default = null - description = "bastion security group id for ref" + description = "Existing Bastion Security Group ID" } variable "deployer_hostname" { @@ -505,7 +515,7 @@ variable "ldap_instance_key_pair" { description = "Name of the SSH key configured in your IBM Cloud account that is used to establish a connection to the LDAP Server. Make sure that the SSH key is present in the same resource group and region where the LDAP Servers are provisioned. If you do not have an SSH key in your IBM Cloud account, create one by using the [SSH keys](https://cloud.ibm.com/docs/vpc?topic=vpc-ssh-keys) instructions." } -variable "ldap_instances" { +variable "ldap_instance" { type = list( object({ profile = string @@ -556,17 +566,17 @@ variable "gklm_instances" { description = "Number of instances to be launched for client." } -# variable "scale_encryption_admin_default_password" { -# type = string -# default = null -# description = "The default administrator password used for resetting the admin password based on the user input. The password has to be updated which was configured during the GKLM installation." -# } +variable "scale_encryption_admin_default_password" { + type = string + default = "SKLM@dmin123" + description = "The default administrator password used for resetting the admin password based on the user input. The password has to be updated which was configured during the GKLM installation." +} -# variable "scale_encryption_admin_username" { -# type = string -# default = null -# description = "The default Admin username for Security Key Lifecycle Manager(GKLM)." -# } +variable "scale_encryption_admin_username" { + type = string + default = "SKLMAdmin" + description = "The default Admin username for Security Key Lifecycle Manager(GKLM)." +} variable "scale_encryption_admin_password" { type = string @@ -574,6 +584,12 @@ variable "scale_encryption_admin_password" { description = "Password that is used for performing administrative operations for the GKLM.The password must contain at least 8 characters and at most 20 characters. For a strong password, at least three alphabetic characters are required, with at least one uppercase and one lowercase letter. Two numbers, and at least one special character from this(~@_+:). Make sure that the password doesn't include the username. Visit this [page](https://www.ibm.com/docs/en/gklm/3.0.1?topic=roles-password-policy) to know more about password policy of GKLM. " } +variable "key_protect_instance_id" { + type = string + default = null + description = "An existing Key Protect instance used for filesystem encryption" +} + variable "storage_type" { type = string default = "scratch" @@ -715,3 +731,43 @@ variable "existing_bastion_instance_name" { default = null description = "Bastion instance name." } + +########################################################################### +# Application Center variables +########################################################################### + +variable "app_center_gui_password" { + type = string + default = "" + sensitive = true + description = "Password for IBM Spectrum LSF Application Center GUI." +} + +########################################################################### +# Login Node variables +########################################################################### +variable "login_instance" { + type = list( + object({ + profile = string + image = string + }) + ) + default = [{ + profile = "bx2-2x8" + image = "hpcaas-lsf10-rhel810-compute-v8" + }] + description = "Number of instances to be launched for login node." +} + +variable "vpc_cluster_private_subnets_cidr_blocks" { + type = string + default = "10.241.0.0/20" + description = "Provide the CIDR block required for the creation of the compute cluster's private subnet. One CIDR block is required. If using a hybrid environment, modify the CIDR block to avoid conflicts with any on-premises CIDR blocks. Ensure the selected CIDR block size can accommodate the maximum number of management and dynamic compute nodes expected in your cluster. For more information on CIDR block size selection, refer to the documentation, see [Choosing IP ranges for your VPC](https://cloud.ibm.com/docs/vpc?topic=vpc-choosing-ip-ranges-for-your-vpc)." +} + +variable "bms_boot_drive_encryption" { + type = bool + default = false + description = "To enable the encryption for the boot drive of bare metal server. Select true or false" +} \ No newline at end of file diff --git a/modules/resource_provisioner/locals.tf b/modules/resource_provisioner/locals.tf index eb432634..1b51d87a 100644 --- a/modules/resource_provisioner/locals.tf +++ b/modules/resource_provisioner/locals.tf @@ -1,14 +1,15 @@ locals { - schematics_inputs_path = "/tmp/.schematics/solution_terraform.auto.tfvars.json" - remote_inputs_path = format("%s/terraform.tfvars.json", "/tmp") - deployer_path = "/opt/ibm" - remote_terraform_path = format("%s/terraform-ibm-hpc", local.deployer_path) - da_hpc_repo_url = "github.ibm.com/workload-eng-services/HPCaaS.git" - da_hpc_repo_tag = "develop-da-longterm" ###### change it to main in future + schematics_inputs_path = format("/tmp/.schematics/%s/solution_terraform.auto.tfvars.json", var.cluster_prefix) + remote_inputs_path = format("%s/terraform.tfvars.json", "/tmp") + deployer_path = "/opt/ibm" + remote_terraform_path = format("%s/terraform-ibm-hpc", local.deployer_path) + da_hpc_repo_url = "github.com/terraform-ibm-modules/terraform-ibm-hpc.git" + da_hpc_repo_tag = "develop-da-longterm-scale" ###### change it to main in future remote_ansible_path = format("%s/ibm-spectrumscale-cloud-deploy", local.deployer_path) scale_cloud_infra_repo_url = "https://github.com/jayeshh123/ibm-spectrum-scale-install-infra" scale_cloud_infra_repo_name = "ibm-spectrum-scale-install-infra" scale_cloud_infra_repo_tag = "jay_scale_da_api" products = var.scheduler == "Scale" ? "scale" : "lsf" ssh_key_file = "${path.root}/../../solutions/${local.products}/bastion_id_rsa" + bastion_public_key_content = var.existing_bastion_instance_name != null ? var.bastion_public_key_content : "" } diff --git a/modules/resource_provisioner/main.tf b/modules/resource_provisioner/main.tf index 7b9ae7e3..9ce798a1 100644 --- a/modules/resource_provisioner/main.tf +++ b/modules/resource_provisioner/main.tf @@ -20,8 +20,7 @@ resource "null_resource" "tf_resource_provisioner" { inline = [ # Remove and re-clone the remote terraform path repo # "if [ -d ${local.remote_terraform_path} ]; then echo 'Removing existing repository at ${local.remote_terraform_path}' && sudo rm -rf ${local.remote_terraform_path}; fi", - # "echo 'Cloning repository with tag: ${local.da_hpc_repo_tag}' && sudo git clone -b ${local.da_hpc_repo_tag} https://${var.github_token}@${local.da_hpc_repo_url} ${local.remote_terraform_path}", - "if [ ! -d ${local.remote_terraform_path} ]; then echo 'Cloning repository with tag: ${local.da_hpc_repo_tag}' && sudo git clone -b ${local.da_hpc_repo_tag} https://${var.github_token}@${local.da_hpc_repo_url} ${local.remote_terraform_path}; fi", + "if [ ! -d ${local.remote_terraform_path} ]; then echo 'Cloning repository with tag: ${local.da_hpc_repo_tag}' && sudo git clone -b ${local.da_hpc_repo_tag} https://${local.da_hpc_repo_url} ${local.remote_terraform_path}; fi", # Clone Spectrum Scale collection if it doesn't exist "if [ ! -d ${local.remote_ansible_path}/${local.scale_cloud_infra_repo_name}/collections/ansible_collections/ibm/spectrum_scale ]; then sudo git clone -b ${local.scale_cloud_infra_repo_tag} ${local.scale_cloud_infra_repo_url} ${local.remote_ansible_path}/${local.scale_cloud_infra_repo_name}/collections/ansible_collections/ibm/spectrum_scale; fi", @@ -33,7 +32,7 @@ resource "null_resource" "tf_resource_provisioner" { "sudo cp ${local.remote_inputs_path} ${local.remote_terraform_path}", # Run Terraform init and apply - "export TF_LOG=${var.TF_LOG} && sudo -E terraform -chdir=${local.remote_terraform_path} init && sudo -E terraform -chdir=${local.remote_terraform_path} apply -parallelism=${var.TF_PARALLELISM} -auto-approve" + "export TF_LOG=${var.TF_LOG} && sudo -E terraform -chdir=${local.remote_terraform_path} init && sudo -E terraform -chdir=${local.remote_terraform_path} apply -parallelism=${var.TF_PARALLELISM} -auto-approve -lock=false" ] } @@ -42,6 +41,25 @@ resource "null_resource" "tf_resource_provisioner" { } } +resource "null_resource" "ext_bastion_access" { + count = var.enable_deployer && var.existing_bastion_instance_name != null ? 1 : 0 + + connection { + type = "ssh" + host = var.bastion_fip + user = "ubuntu" + private_key = var.bastion_private_key_content + timeout = "60m" + } + + provisioner "remote-exec" { + inline = [ + "echo 'Adding SSH Key to Existing Bastion Host'", + sensitive("echo '${local.bastion_public_key_content}' >> /home/$(whoami)/.ssh/authorized_keys"), + ] + } +} + resource "null_resource" "fetch_host_details_from_deployer" { count = var.enable_deployer == true && var.scheduler == "LSF" ? 1 : 0 @@ -107,7 +125,7 @@ resource "null_resource" "cluster_destroyer" { when = destroy on_failure = fail inline = [ - "export TF_LOG=${self.triggers.conn_terraform_log_level} && sudo -E terraform -chdir=${self.triggers.conn_remote_terraform_path} destroy -auto-approve" + "export TF_LOG=${self.triggers.conn_terraform_log_level} && sudo -E terraform -chdir=${self.triggers.conn_remote_terraform_path} destroy -auto-approve -lock=false" ] } } diff --git a/modules/resource_provisioner/variables.tf b/modules/resource_provisioner/variables.tf index 54740f79..772f43b5 100644 --- a/modules/resource_provisioner/variables.tf +++ b/modules/resource_provisioner/variables.tf @@ -8,11 +8,17 @@ variable "ibmcloud_api_key" { description = "IBM Cloud API Key that will be used for authentication in scripts run in this module. Only required if certain options are required." } -# Delete this variable before pushing to the public repository. -variable "github_token" { +############################################################################## +# Cluster Level Variables +############################################################################## +variable "cluster_prefix" { type = string - default = null - description = "Provide your GitHub token to download the HPCaaS code into the Deployer node" + default = "hpc" + description = "A unique identifier for resources. Must begin with a letter and end with a letter or number. This cluster_prefix will be prepended to any resources provisioned by this template. Prefixes must be 16 or fewer characters." + validation { + error_message = "Prefix must begin and end with a letter and contain only letters, numbers, and - characters." + condition = can(regex("^([A-z]|[a-z][-a-z0-9]*[a-z0-9])$", var.cluster_prefix)) + } } ############################################################################## @@ -55,6 +61,19 @@ variable "bastion_private_key_content" { description = "Bastion private key content." } +variable "existing_bastion_instance_name" { + type = string + default = null + description = "Provide the name of the bastion instance. If none given then new bastion will be created." +} + +variable "bastion_public_key_content" { + type = string + sensitive = true + default = null + description = "Bastion security group id." +} + ############################################################################## # Terraform generic Variables ############################################################################# diff --git a/modules/write_inventory/datasource.tf b/modules/write_inventory/datasource.tf index d03a767f..fa155c3d 100644 --- a/modules/write_inventory/datasource.tf +++ b/modules/write_inventory/datasource.tf @@ -3,5 +3,6 @@ data "ibm_is_instance_profile" "dynamic_worker_profile" { } data "ibm_is_image" "dynamic_compute" { - name = var.dynamic_compute_instances[0].image + count = local.compute_image_found_in_map ? 0 : 1 + name = var.dynamic_compute_instances[0].image } diff --git a/modules/write_inventory/image_map.tf b/modules/write_inventory/image_map.tf new file mode 100644 index 00000000..f58ee9d7 --- /dev/null +++ b/modules/write_inventory/image_map.tf @@ -0,0 +1,52 @@ +locals { + image_region_map = { + "hpc-lsf-fp15-rhel810-v1" = { + "eu-es" = "r050-deeeb734-2523-4aff-96e3-2be8d2b0d634" + "eu-gb" = "r018-8edcd9a1-dbca-462f-bf74-017c15ca4b71" + "eu-de" = "r010-394c5295-1704-4066-b57e-ae9bca1968de" + "us-east" = "r014-1777cdcb-8a68-4ef0-becf-84ec0d2e9a26" + "us-south" = "r006-40caf671-28a8-42c5-b83e-b2ba3ceb86af" + "jp-tok" = "r022-01531301-d100-44ba-b1a3-12e7c8d65469" + "jp-osa" = "r034-ac455775-c667-4d3e-b281-9ef845080599" + "au-syd" = "r026-eff4d59c-5006-46cc-8b03-60514f763a87" + "br-sao" = "r042-1e1bbeeb-3ef7-4f7a-a44c-9f50609bb538" + "ca-tor" = "r038-bb9fcdb7-d200-4cdd-af04-6848007c9cb2" + }, + "hpc-lsf-fp15-compute-rhel810-v1" = { + "eu-es" = "r050-f0608e39-9dcf-4aca-9e92-7719474b3e86" + "eu-gb" = "r018-db8b97a8-6f87-4cf7-a044-847da6ab5c59" + "eu-de" = "r010-957efd6b-e7b3-4249-8644-6184f1531915" + "us-east" = "r014-5fdd6a25-5943-4084-9c57-b900a80579a3" + "us-south" = "r006-5c0e462a-679c-4a18-81a5-0fe036f483a3" + "jp-tok" = "r022-8087a984-8912-42ff-9576-c5cab8edda3a" + "jp-osa" = "r034-728d1f12-7842-412c-97a0-9deb66c23962" + "au-syd" = "r026-f957ed22-9565-441c-bce6-f716360e02ea" + "br-sao" = "r042-7bf7d508-a7b1-4434-ae6a-6986f7042d4e" + "ca-tor" = "r038-a658da44-f1b4-4e02-826a-38b16e6ae98a" + }, + "hpc-lsf-fp14-rhel810-v1" = { + "eu-es" = "r050-12a3533c-5fa1-4bcc-8765-7150a06e122e" + "eu-gb" = "r018-3ef87e4e-0f46-424a-b623-fa25215094c0" + "eu-de" = "r010-48e5560b-4d34-43ca-b824-2d85513f3188" + "us-east" = "r014-3719a4e2-6746-4eaf-844a-c3721b7c6d32" + "us-south" = "r006-e720ec63-5e8c-46ce-b7a2-51c454e64099" + "jp-tok" = "r022-917ce78b-dacf-4008-b6c0-4058bf59a5b4" + "jp-osa" = "r034-507fb655-4164-45b8-b1d7-f6cb2fbeafc9" + "au-syd" = "r026-01900450-7314-42ea-aee3-acf5179300c0" + "br-sao" = "r042-bb407137-93cf-4ec7-aa77-4702896fff97" + "ca-tor" = "r038-6683403d-1cf5-4f39-a96f-c8cbb2314ad5" + }, + "hpc-lsf-fp14-compute-rhel810-v1" = { + "eu-es" = "r050-d2ad9625-1668-4b2c-a8bb-6ef14678d3ed" + "eu-gb" = "r018-f1059503-27ec-44d4-a981-21be6225520a" + "eu-de" = "r010-8115b1f6-912e-4b55-89f1-e448c397115e" + "us-east" = "r014-5108884c-011b-4473-b585-0d43309c37e3" + "us-south" = "r006-68c6af72-1abf-4d13-bca1-4f42be5d2c70" + "jp-tok" = "r022-1932c5ec-b5a6-4262-aa56-6c6257c8297f" + "jp-osa" = "r034-50be9bd9-9623-4ffc-8ce7-aab66f674137" + "au-syd" = "r026-11aee148-c938-4524-91e6-8e6da5933a42" + "br-sao" = "r042-5cb62448-e771-4caf-a556-28fdf88acab9" + "ca-tor" = "r038-fa815ec1-d52e-42b2-8221-5b8c2145a248" + } + } +} diff --git a/modules/write_inventory/locals.tf b/modules/write_inventory/locals.tf index a63acf72..97285d13 100644 --- a/modules/write_inventory/locals.tf +++ b/modules/write_inventory/locals.tf @@ -1,9 +1,13 @@ locals { - vcpus = tonumber(data.ibm_is_instance_profile.dynamic_worker_profile.vcpu_count[0].value) - ncores = tonumber(local.vcpus / 2) - ncpus = tonumber(var.enable_hyperthreading ? local.vcpus : local.ncores) - mem_in_mb = tonumber(data.ibm_is_instance_profile.dynamic_worker_profile.memory[0].value) * 1024 - rc_max_num = tonumber(var.dynamic_compute_instances[0].count) - rc_profile = var.dynamic_compute_instances[0].profile - image_id = data.ibm_is_image.dynamic_compute.id + region = join("-", slice(split("-", var.zones[0]), 0, 2)) + vcpus = tonumber(data.ibm_is_instance_profile.dynamic_worker_profile.vcpu_count[0].value) + ncores = tonumber(local.vcpus / 2) + ncpus = tonumber(var.enable_hyperthreading ? local.vcpus : local.ncores) + mem_in_mb = tonumber(data.ibm_is_instance_profile.dynamic_worker_profile.memory[0].value) * 1024 + rc_max_num = tonumber(var.dynamic_compute_instances[0].count) + rc_profile = var.dynamic_compute_instances[0].profile + boot_volume_encryption_key = jsonencode(var.kms_encryption_enabled ? var.boot_volume_encryption_key : null) + compute_image_found_in_map = contains(keys(local.image_region_map), var.dynamic_compute_instances[0]["image"]) + new_compute_image_id = local.compute_image_found_in_map ? local.image_region_map[var.dynamic_compute_instances[0]["image"]][local.region] : "Image not found with the given name" + image_id = local.compute_image_found_in_map ? local.new_compute_image_id : data.ibm_is_image.dynamic_compute[0].id } diff --git a/modules/write_inventory/main.tf b/modules/write_inventory/main.tf index 0cfea668..72f87020 100644 --- a/modules/write_inventory/main.tf +++ b/modules/write_inventory/main.tf @@ -7,6 +7,7 @@ resource "local_sensitive_file" "infra_details_to_json" { "lsf_clients": ${jsonencode(var.lsf_clients)}, "gui_hosts": ${jsonencode(var.gui_hosts)}, "db_hosts": ${jsonencode(var.db_hosts)}, + "login_host": ${jsonencode(var.login_host)}, "prefix": ${jsonencode(var.prefix)}, "HA_shared_dir": ${jsonencode(var.ha_shared_dir)}, "NFS_install_dir": ${jsonencode(var.nfs_install_dir)}, @@ -15,6 +16,7 @@ resource "local_sensitive_file" "infra_details_to_json" { "dns_domain_names": ${jsonencode(var.dns_domain_names["compute"])}, "enable_hyperthreading": ${var.enable_hyperthreading}, "ibmcloud_api_key": "${var.ibmcloud_api_key}", + "app_center_gui_password": "${var.app_center_gui_password}", "lsf_version": "${var.lsf_version}", "compute_public_key_content": ${jsonencode(var.compute_public_key_content)}, "compute_private_key_content": ${jsonencode(var.compute_private_key_content)}, @@ -34,7 +36,8 @@ resource "local_sensitive_file" "infra_details_to_json" { "dynamic_compute_instances": ${jsonencode(var.dynamic_compute_instances)}, "compute_subnets_cidr": ${jsonencode(var.compute_subnets_cidr)}, "compute_security_group_id": ${jsonencode(var.compute_security_group_id)}, - "compute_subnet_crn": "${var.compute_subnet_crn}" + "compute_subnet_crn": "${var.compute_subnet_crn}", + "boot_volume_encryption_key": ${local.boot_volume_encryption_key} } EOT filename = var.json_inventory_path diff --git a/modules/write_inventory/variables.tf b/modules/write_inventory/variables.tf index 98d85d99..e037c407 100644 --- a/modules/write_inventory/variables.tf +++ b/modules/write_inventory/variables.tf @@ -181,3 +181,28 @@ variable "dynamic_compute_instances" { }] description = "MaxNumber of instances to be launched for compute cluster." } + +variable "boot_volume_encryption_key" { + type = string + default = null + description = "The kms_key crn." +} + +variable "kms_encryption_enabled" { + description = "Enable Key management" + type = bool + default = true +} + +variable "app_center_gui_password" { + type = string + default = "" + sensitive = true + description = "Password for IBM Spectrum LSF Application Center GUI." +} + +variable "login_host" { + type = list(string) + default = null + description = "list of lsf Login node" +} diff --git a/modules/write_scale_inventory/variables.tf b/modules/write_scale_inventory/variables.tf index dc850ddb..b67acc2f 100644 --- a/modules/write_scale_inventory/variables.tf +++ b/modules/write_scale_inventory/variables.tf @@ -12,7 +12,7 @@ variable "cloud_platform" { variable "resource_prefix" { type = string - default = "hpc" + default = "lsf" description = "A unique identifier for resources. Must begin with a letter and end with a letter or number. This prefix will be prepended to any resources provisioned by this template. Prefixes must be 16 or fewer characters." } diff --git a/solutions/scale/locals.tf b/solutions/scale/locals.tf index 99ca69b9..64d7731e 100644 --- a/solutions/scale/locals.tf +++ b/solutions/scale/locals.tf @@ -18,23 +18,21 @@ locals { locals { config = { - existing_resource_group = var.existing_resource_group - remote_allowed_ips = var.remote_allowed_ips - ssh_keys = var.ssh_keys - vpc_cluster_login_private_subnets_cidr_blocks = var.vpc_cluster_login_private_subnets_cidr_blocks - compute_gui_password = var.compute_gui_password - compute_gui_username = var.compute_gui_username - vpc_cluster_private_subnets_cidr_blocks = var.vpc_cluster_private_subnets_cidr_blocks - cos_instance_name = var.cos_instance_name - dns_custom_resolver_id = var.dns_custom_resolver_id - dns_instance_id = var.dns_instance_id - dns_domain_names = var.dns_domain_names - enable_atracker = var.enable_atracker - enable_bastion = var.enable_bastion - bastion_image = var.bastion_image - bastion_instance_profile = var.bastion_instance_profile - deployer_image = var.deployer_image - deployer_instance_profile = var.deployer_instance_profile + existing_resource_group = var.existing_resource_group + remote_allowed_ips = var.remote_allowed_ips + ssh_keys = var.ssh_keys + vpc_cluster_login_private_subnets_cidr_blocks = var.vpc_cluster_login_private_subnets_cidr_blocks + compute_gui_password = var.compute_gui_password + compute_gui_username = var.compute_gui_username + vpc_cluster_private_subnets_cidr_blocks = var.vpc_cluster_private_subnets_cidr_blocks + cos_instance_name = var.cos_instance_name + dns_custom_resolver_id = var.dns_custom_resolver_id + dns_instance_id = var.dns_instance_id + dns_domain_names = var.dns_domain_names + enable_atracker = var.enable_atracker + # enable_bastion = var.enable_bastion + bastion_instance = var.bastion_instance + deployer_instance = var.deployer_instance enable_cos_integration = var.enable_cos_integration enable_vpc_flow_logs = var.enable_vpc_flow_logs hpcs_instance_name = var.hpcs_instance_name @@ -69,7 +67,7 @@ locals { scc_event_notification_plan = var.scc_event_notification_plan skip_flowlogs_s2s_auth_policy = var.skip_flowlogs_s2s_auth_policy skip_kms_s2s_auth_policy = var.skip_kms_s2s_auth_policy - skip_iam_authorization_policy = var.skip_iam_authorization_policy + skip_iam_block_storage_authorization_policy = var.skip_iam_block_storage_authorization_policy ibmcloud_api_key = var.ibmcloud_api_key afm_instances = var.afm_instances afm_cos_config = var.afm_cos_config @@ -80,7 +78,7 @@ locals { ldap_user_password = var.ldap_user_password ldap_server = var.ldap_server ldap_server_cert = var.ldap_server_cert - ldap_instances = var.ldap_instances + ldap_instance = var.ldap_instance scale_encryption_enabled = var.scale_encryption_enabled scale_encryption_type = var.scale_encryption_type gklm_instance_key_pair = var.gklm_instance_key_pair @@ -90,35 +88,35 @@ locals { scale_encryption_admin_default_password = var.scale_encryption_admin_default_password scale_encryption_admin_password = var.scale_encryption_admin_password scale_encryption_admin_username = var.scale_encryption_admin_username + key_protect_instance_id = var.key_protect_instance_id filesystem_config = var.filesystem_config existing_bastion_instance_name = var.existing_bastion_instance_name existing_bastion_instance_public_ip = var.existing_bastion_instance_public_ip existing_bastion_security_group_id = var.existing_bastion_security_group_id existing_bastion_ssh_private_key = var.existing_bastion_ssh_private_key - github_token = var.github_token # Delete this variable before pushing to the public repository. + bms_boot_drive_encryption = var.bms_boot_drive_encryption + tie_breaker_bm_server = var.tie_breaker_bm_server } } # Compile Environment for Config output locals { env = { - existing_resource_group = lookup(local.override[local.override_type], "existing_resource_group", local.config.existing_resource_group) - remote_allowed_ips = lookup(local.override[local.override_type], "remote_allowed_ips", local.config.remote_allowed_ips) - ssh_keys = lookup(local.override[local.override_type], "ssh_keys", local.config.ssh_keys) - vpc_cluster_login_private_subnets_cidr_blocks = lookup(local.override[local.override_type], "vpc_cluster_login_private_subnets_cidr_blocks", local.config.vpc_cluster_login_private_subnets_cidr_blocks) - compute_gui_password = lookup(local.override[local.override_type], "compute_gui_password", local.config.compute_gui_password) - compute_gui_username = lookup(local.override[local.override_type], "compute_gui_username", local.config.compute_gui_username) - vpc_cluster_private_subnets_cidr_blocks = lookup(local.override[local.override_type], "vpc_cluster_private_subnets_cidr_blocks", local.config.vpc_cluster_private_subnets_cidr_blocks) - cos_instance_name = lookup(local.override[local.override_type], "cos_instance_name", local.config.cos_instance_name) - dns_custom_resolver_id = lookup(local.override[local.override_type], "dns_custom_resolver_id", local.config.dns_custom_resolver_id) - dns_instance_id = lookup(local.override[local.override_type], "dns_instance_id", local.config.dns_instance_id) - dns_domain_names = lookup(local.override[local.override_type], "dns_domain_names", local.config.dns_domain_names) - enable_atracker = lookup(local.override[local.override_type], "enable_atracker", local.config.enable_atracker) - enable_bastion = lookup(local.override[local.override_type], "enable_bastion", local.config.enable_bastion) - bastion_image = lookup(local.override[local.override_type], "bastion_image", local.config.bastion_image) - bastion_instance_profile = lookup(local.override[local.override_type], "bastion_instance_profile", local.config.bastion_instance_profile) - deployer_image = lookup(local.override[local.override_type], "deployer_image", local.config.deployer_image) - deployer_instance_profile = lookup(local.override[local.override_type], "deployer_instance_profile", local.config.deployer_instance_profile) + existing_resource_group = lookup(local.override[local.override_type], "existing_resource_group", local.config.existing_resource_group) + remote_allowed_ips = lookup(local.override[local.override_type], "remote_allowed_ips", local.config.remote_allowed_ips) + ssh_keys = lookup(local.override[local.override_type], "ssh_keys", local.config.ssh_keys) + vpc_cluster_login_private_subnets_cidr_blocks = lookup(local.override[local.override_type], "vpc_cluster_login_private_subnets_cidr_blocks", local.config.vpc_cluster_login_private_subnets_cidr_blocks) + compute_gui_password = lookup(local.override[local.override_type], "compute_gui_password", local.config.compute_gui_password) + compute_gui_username = lookup(local.override[local.override_type], "compute_gui_username", local.config.compute_gui_username) + vpc_cluster_private_subnets_cidr_blocks = lookup(local.override[local.override_type], "vpc_cluster_private_subnets_cidr_blocks", local.config.vpc_cluster_private_subnets_cidr_blocks) + cos_instance_name = lookup(local.override[local.override_type], "cos_instance_name", local.config.cos_instance_name) + dns_custom_resolver_id = lookup(local.override[local.override_type], "dns_custom_resolver_id", local.config.dns_custom_resolver_id) + dns_instance_id = lookup(local.override[local.override_type], "dns_instance_id", local.config.dns_instance_id) + dns_domain_names = lookup(local.override[local.override_type], "dns_domain_names", local.config.dns_domain_names) + enable_atracker = lookup(local.override[local.override_type], "enable_atracker", local.config.enable_atracker) + # enable_bastion = lookup(local.override[local.override_type], "enable_bastion", local.config.enable_bastion) + bastion_instance = lookup(local.override[local.override_type], "bastion_instance", local.config.bastion_instance) + deployer_instance = lookup(local.override[local.override_type], "deployer_instance", local.config.deployer_instance) enable_cos_integration = lookup(local.override[local.override_type], "enable_cos_integration", local.config.enable_cos_integration) enable_vpc_flow_logs = lookup(local.override[local.override_type], "enable_vpc_flow_logs", local.config.enable_vpc_flow_logs) hpcs_instance_name = lookup(local.override[local.override_type], "hpcs_instance_name", local.config.hpcs_instance_name) @@ -153,7 +151,7 @@ locals { scc_event_notification_plan = lookup(local.override[local.override_type], "scc_event_notification_plan", local.config.scc_event_notification_plan) skip_flowlogs_s2s_auth_policy = lookup(local.override[local.override_type], "skip_flowlogs_s2s_auth_policy", local.config.skip_flowlogs_s2s_auth_policy) skip_kms_s2s_auth_policy = lookup(local.override[local.override_type], "skip_kms_s2s_auth_policy", local.config.skip_kms_s2s_auth_policy) - skip_iam_authorization_policy = lookup(local.override[local.override_type], "skip_iam_authorization_policy", local.config.skip_iam_authorization_policy) + skip_iam_block_storage_authorization_policy = lookup(local.override[local.override_type], "skip_iam_block_storage_authorization_policy", local.config.skip_iam_block_storage_authorization_policy) ibmcloud_api_key = lookup(local.override[local.override_type], "ibmcloud_api_key", local.config.ibmcloud_api_key) afm_instances = lookup(local.override[local.override_type], "afm_instances", local.config.afm_instances) afm_cos_config = lookup(local.override[local.override_type], "afm_cos_config", local.config.afm_cos_config) @@ -164,11 +162,12 @@ locals { ldap_user_password = lookup(local.override[local.override_type], "ldap_user_password", local.config.ldap_user_password) ldap_server = lookup(local.override[local.override_type], "ldap_server", local.config.ldap_server) ldap_server_cert = lookup(local.override[local.override_type], "ldap_server_cert", local.config.ldap_server_cert) - ldap_instances = lookup(local.override[local.override_type], "ldap_instances", local.config.ldap_instances) + ldap_instance = lookup(local.override[local.override_type], "ldap_instance", local.config.ldap_instance) scale_encryption_enabled = lookup(local.override[local.override_type], "scale_encryption_enabled", local.config.scale_encryption_enabled) scale_encryption_type = lookup(local.override[local.override_type], "scale_encryption_type", local.config.scale_encryption_type) gklm_instance_key_pair = lookup(local.override[local.override_type], "gklm_instance_key_pair", local.config.gklm_instance_key_pair) gklm_instances = lookup(local.override[local.override_type], "gklm_instances", local.config.gklm_instances) + key_protect_instance_id = lookup(local.override[local.override_type], "key_protect_instance_id", local.config.key_protect_instance_id) storage_type = lookup(local.override[local.override_type], "storage_type", local.config.storage_type) colocate_protocol_instances = lookup(local.override[local.override_type], "colocate_protocol_instances", local.config.colocate_protocol_instances) scale_encryption_admin_default_password = lookup(local.override[local.override_type], "scale_encryption_admin_default_password", local.config.scale_encryption_admin_default_password) @@ -179,6 +178,7 @@ locals { existing_bastion_instance_public_ip = lookup(local.override[local.override_type], "existing_bastion_instance_public_ip", local.config.existing_bastion_instance_public_ip) existing_bastion_security_group_id = lookup(local.override[local.override_type], "existing_bastion_security_group_id", local.config.existing_bastion_security_group_id) existing_bastion_ssh_private_key = lookup(local.override[local.override_type], "existing_bastion_ssh_private_key", local.config.existing_bastion_ssh_private_key) - github_token = lookup(local.override[local.override_type], "github_token", local.config.github_token) # Delete this variable before pushing to the public repository. - } + bms_boot_drive_encryption = lookup(local.override[local.override_type], "bms_boot_drive_encryption", local.config.bms_boot_drive_encryption) + tie_breaker_bm_server = lookup(local.override[local.override_type], "tie_breaker_bm_server", local.config.tie_breaker_bm_server) + } } diff --git a/solutions/scale/main.tf b/solutions/scale/main.tf index 0a882439..b58d095a 100644 --- a/solutions/scale/main.tf +++ b/solutions/scale/main.tf @@ -1,24 +1,22 @@ module "scale" { - source = "./../.." - scheduler = "Scale" - ibm_customer_number = var.ibm_customer_number - zones = var.zones - remote_allowed_ips = var.remote_allowed_ips - cluster_prefix = local.env.cluster_prefix - ssh_keys = local.env.ssh_keys - existing_resource_group = local.env.existing_resource_group - vpc_cluster_login_private_subnets_cidr_blocks = local.env.vpc_cluster_login_private_subnets_cidr_blocks - vpc_cluster_private_subnets_cidr_blocks = local.env.vpc_cluster_private_subnets_cidr_blocks - cos_instance_name = local.env.cos_instance_name - dns_custom_resolver_id = local.env.dns_custom_resolver_id - dns_instance_id = local.env.dns_instance_id - dns_domain_names = local.env.dns_domain_names - enable_atracker = local.env.enable_atracker - enable_bastion = local.env.enable_bastion - bastion_image = local.env.bastion_image - bastion_instance_profile = local.env.bastion_instance_profile - deployer_image = local.env.deployer_image - deployer_instance_profile = local.env.deployer_instance_profile + source = "./../.." + scheduler = "Scale" + ibm_customer_number = var.ibm_customer_number + zones = var.zones + remote_allowed_ips = var.remote_allowed_ips + cluster_prefix = local.env.cluster_prefix + ssh_keys = local.env.ssh_keys + existing_resource_group = local.env.existing_resource_group + vpc_cluster_login_private_subnets_cidr_blocks = local.env.vpc_cluster_login_private_subnets_cidr_blocks + vpc_cluster_private_subnets_cidr_blocks = local.env.vpc_cluster_private_subnets_cidr_blocks + cos_instance_name = local.env.cos_instance_name + dns_custom_resolver_id = local.env.dns_custom_resolver_id + dns_instance_id = local.env.dns_instance_id + dns_domain_names = local.env.dns_domain_names + enable_atracker = local.env.enable_atracker + # enable_bastion = local.env.enable_bastion + bastion_instance = local.env.bastion_instance + deployer_instance = local.env.deployer_instance enable_cos_integration = local.env.enable_cos_integration enable_vpc_flow_logs = local.env.enable_vpc_flow_logs key_management = local.env.key_management @@ -53,7 +51,7 @@ module "scale" { scc_event_notification_plan = local.env.scc_event_notification_plan skip_flowlogs_s2s_auth_policy = local.env.skip_flowlogs_s2s_auth_policy skip_kms_s2s_auth_policy = local.env.skip_kms_s2s_auth_policy - skip_iam_authorization_policy = local.env.skip_iam_authorization_policy + skip_iam_block_storage_authorization_policy = local.env.skip_iam_block_storage_authorization_policy ibmcloud_api_key = local.env.ibmcloud_api_key afm_instances = local.env.afm_instances afm_cos_config = local.env.afm_cos_config @@ -64,21 +62,23 @@ module "scale" { ldap_user_password = local.env.ldap_user_password ldap_server = local.env.ldap_server ldap_server_cert = local.env.ldap_server_cert - ldap_instances = local.env.ldap_instances + ldap_instance = local.env.ldap_instance scale_encryption_enabled = local.env.scale_encryption_enabled scale_encryption_type = local.env.scale_encryption_type gklm_instance_key_pair = local.env.gklm_instance_key_pair gklm_instances = local.env.gklm_instances storage_type = local.env.storage_type + scale_encryption_admin_username = local.env.scale_encryption_admin_username + scale_encryption_admin_default_password = local.env.scale_encryption_admin_default_password scale_encryption_admin_password = local.env.scale_encryption_admin_password + key_protect_instance_id = local.env.key_protect_instance_id filesystem_config = local.env.filesystem_config existing_bastion_instance_name = local.env.existing_bastion_instance_name existing_bastion_instance_public_ip = local.env.existing_bastion_instance_public_ip existing_bastion_security_group_id = local.env.existing_bastion_security_group_id existing_bastion_ssh_private_key = local.env.existing_bastion_ssh_private_key client_subnets_cidr = [local.env.client_subnets_cidr] + bms_boot_drive_encryption = local.env.bms_boot_drive_encryption + tie_breaker_bm_server = local.env.tie_breaker_bm_server # hpcs_instance_name = local.env.hpcs_instance_name - # scale_encryption_admin_username = local.env.scale_encryption_admin_username - # scale_encryption_admin_default_password = local.env.scale_encryption_admin_default_password - github_token = local.env.github_token # Delete this variable before pushing to the public repository. } diff --git a/solutions/scale/variables.tf b/solutions/scale/variables.tf index fda7f424..3a4a48d8 100644 --- a/solutions/scale/variables.tf +++ b/solutions/scale/variables.tf @@ -24,13 +24,6 @@ variable "ibmcloud_api_key" { description = "IBM Cloud API Key that will be used for authentication in scripts run in this module. Only required if certain options are required." } -# Delete this variable before pushing to the public repository. -variable "github_token" { - type = string - default = null - description = "Provide your GitHub token to download the HPCaaS code into the Deployer node" -} - ############################################################################## # Cluster Level Variables ############################################################################## @@ -115,22 +108,22 @@ variable "placement_strategy" { ############################################################################## # Access Variables ############################################################################## -variable "enable_bastion" { - type = bool - default = true - description = "The solution supports multiple ways to connect to your HPC cluster for example, using bastion node, via VPN or direct connection. If connecting to the HPC cluster via VPN or direct connection, set this value to false." -} - -variable "bastion_image" { - type = string - default = "ibm-ubuntu-22-04-3-minimal-amd64-1" - description = "The image to use to deploy the bastion host." -} +# variable "enable_bastion" { +# type = bool +# default = true +# description = "The solution supports multiple ways to connect to your HPC cluster for example, using bastion node, via VPN or direct connection. If connecting to the HPC cluster via VPN or direct connection, set this value to false." +# } -variable "bastion_instance_profile" { - type = string - default = "cx2-4x8" - description = "Deployer should be only used for better deployment performance" +variable "bastion_instance" { + type = object({ + image = string + profile = string + }) + default = { + image = "ibm-ubuntu-22-04-5-minimal-amd64-3" + profile = "cx2-4x8" + } + description = "Configuration for the Bastion node, including the image and instance profile. Only Ubuntu stock images are supported." } variable "vpc_cluster_login_private_subnets_cidr_blocks" { @@ -146,16 +139,17 @@ variable "vpc_cluster_login_private_subnets_cidr_blocks" { ############################################################################## # Deployer Variables ############################################################################## -variable "deployer_image" { - type = string - default = "jay-lsf-new-image" - description = "The image to use to deploy the deployer host." -} -variable "deployer_instance_profile" { - type = string - default = "mx2-4x32" - description = "Deployer should be only used for better deployment performance" +variable "deployer_instance" { + type = object({ + image = string + profile = string + }) + default = { + image = "jay-lsf-new-image" + profile = "mx2-4x32" + } + description = "Configuration for the deployer node, including the custom image and instance profile. By default, uses fixpack_15 image and a bx2-8x32 profile." } ############################################################################## @@ -266,6 +260,24 @@ variable "storage_servers" { description = "Number of BareMetal Servers to be launched for storage cluster." } +variable "tie_breaker_bm_server" { + type = list( + object({ + profile = string + count = number + image = string + filesystem = string + }) + ) + default = [{ + profile = "cx2d-metal-96x192" + count = 1 + image = "ibm-redhat-8-10-minimal-amd64-4" + filesystem = "/gpfs/fs1" + }] + description = "BareMetal Server to be launched for Tie Breaker." +} + variable "protocol_subnets_cidr" { type = string default = "10.241.40.0/24" @@ -548,7 +560,7 @@ variable "ldap_user_password" { # description = "Name of the SSH key configured in your IBM Cloud account that is used to establish a connection to the LDAP Server. Make sure that the SSH key is present in the same resource group and region where the LDAP Servers are provisioned. If you do not have an SSH key in your IBM Cloud account, create one by using the [SSH keys](https://cloud.ibm.com/docs/vpc?topic=vpc-ssh-keys) instructions." # } -variable "ldap_instances" { +variable "ldap_instance" { type = list( object({ profile = string @@ -575,6 +587,11 @@ variable "scale_encryption_type" { type = string default = null description = "To enable filesystem encryption, specify either 'key_protect' or 'gklm'. If neither is specified, the default value will be 'null' and encryption is disabled" + + validation { + condition = var.scale_encryption_type == "key_protect" || var.scale_encryption_type == "gklm" || var.scale_encryption_type == "null" + error_message = "Invalid value: scale_encryption_type must be 'key_protect', 'gklm', or 'null'" + } } variable "gklm_instance_key_pair" { @@ -594,14 +611,14 @@ variable "gklm_instances" { default = [{ profile = "bx2-2x8" count = 2 - image = "ibm-redhat-8-10-minimal-amd64-4" + image = "hpcc-scale-gklm4202-v2-5-2" }] - description = "Number of instances to be launched for client." + description = "Number of GKLM instances to be launched for scale cluster." } variable "scale_encryption_admin_default_password" { type = string - default = null + default = "SKLM@dmin123" description = "The default administrator password used for resetting the admin password based on the user input. The password has to be updated which was configured during the GKLM installation." } @@ -617,6 +634,14 @@ variable "scale_encryption_admin_password" { description = "Password that is used for performing administrative operations for the GKLM.The password must contain at least 8 characters and at most 20 characters. For a strong password, at least three alphabetic characters are required, with at least one uppercase and one lowercase letter. Two numbers, and at least one special character from this(~@_+:). Make sure that the password doesn't include the username. Visit this [page](https://www.ibm.com/docs/en/gklm/3.0.1?topic=roles-password-policy) to know more about password policy of GKLM. " } +# Existing Key Protect Instance Details + +variable "key_protect_instance_id" { + type = string + default = null + description = "An existing Key Protect instance used for filesystem encryption" +} + variable "storage_type" { type = string default = "scratch" @@ -771,10 +796,10 @@ variable "skip_kms_s2s_auth_policy" { description = "Skip auth policy between KMS service and COS instance, set to true if this policy is already in place on account." } -variable "skip_iam_authorization_policy" { +variable "skip_iam_block_storage_authorization_policy" { type = bool - default = true - description = "Set to false if authorization policy is required for VPC block storage volumes to access kms. This can be set to true if authorization policy already exists. For more information on how to create authorization policy manually, see [creating authorization policies for block storage volume](https://cloud.ibm.com/docs/vpc?topic=vpc-block-s2s-auth&interface=ui)." + default = false + description = "When using an existing KMS instance name, set this value to true if authorization is already enabled between KMS instance and the block storage volume. Otherwise, default is set to false. Ensuring proper authorization avoids access issues during deployment.For more information on how to create authorization policy manually, see [creating authorization policies for block storage volume](https://cloud.ibm.com/docs/vpc?topic=vpc-block-s2s-auth&interface=ui)." } ########################################################################### @@ -805,3 +830,9 @@ variable "existing_bastion_ssh_private_key" { default = null description = "Provide the private SSH key (named id_rsa) used during the creation and configuration of the bastion server to securely authenticate and connect to the bastion server. This allows access to internal network resources from a secure entry point. Note: The corresponding public SSH key (named id_rsa.pub) must already be available in the ~/.ssh/authorized_keys file on the bastion host to establish authentication." } + +variable "bms_boot_drive_encryption" { + type = bool + default = false + description = "To enable the encryption for the boot drive of bare metal server. Select true or false" +} \ No newline at end of file diff --git a/variables.tf b/variables.tf index 9c090225..64d3c9e1 100644 --- a/variables.tf +++ b/variables.tf @@ -8,17 +8,10 @@ variable "ibmcloud_api_key" { description = "IBM Cloud API Key that will be used for authentication in scripts run in this module. Only required if certain options are required." } -# Delete this variable before pushing to the public repository. -variable "github_token" { - type = string - default = null - description = "Provide your GitHub token to download the HPCaaS code into the Deployer node" -} - variable "lsf_version" { type = string default = "fixpack_15" - description = "Select the LSF version to deploy: 'fixpack_14' or 'fixpack_15'. Use null to skip LSF deployment." + description = "Select the desired version of IBM Spectrum LSF to deploy either fixpack_15 or fixpack_14. By default, the solution uses the latest available version, which is Fix Pack 15. If you need to deploy an earlier version such as Fix Pack 14, update the lsf_version field to fixpack_14. When changing the LSF version, ensure that all custom images used for management, compute, and login nodes correspond to the same version. This is essential to maintain compatibility across the cluster and to prevent deployment issues." } ############################################################################## @@ -42,7 +35,7 @@ variable "ibm_customer_number" { ############################################################################## variable "cluster_prefix" { type = string - default = "hpc" + default = "lsf" description = "A unique identifier for resources. Must begin with a letter and end with a letter or number. This cluster_prefix will be prepended to any resources provisioned by this template. Prefixes must be 16 or fewer characters." validation { error_message = "cluster_prefix must begin and end with a letter and contain only letters, numbers, and - characters." @@ -117,22 +110,16 @@ variable "placement_strategy" { ############################################################################## # Access Variables ############################################################################## -variable "enable_bastion" { - type = bool - default = true - description = "The solution supports multiple ways to connect to your HPC cluster for example, using bastion node, via VPN or direct connection. If connecting to the HPC cluster via VPN or direct connection, set this value to false." -} - -variable "bastion_image" { - type = string - default = "ibm-ubuntu-22-04-3-minimal-amd64-1" - description = "The image to use to deploy the bastion host." -} - -variable "bastion_instance_profile" { - type = string - default = "cx2-4x8" - description = "Deployer should be only used for better deployment performance" +variable "bastion_instance" { + type = object({ + image = string + profile = string + }) + default = { + image = "ibm-ubuntu-22-04-3-minimal-amd64-1" + profile = "cx2-4x8" + } + description = "Configuration for the Bastion node, including the image and instance profile. Only Ubuntu stock images are supported." } variable "login_subnet_id" { @@ -160,16 +147,16 @@ variable "enable_deployer" { description = "Deployer should be only used for better deployment performance" } -variable "deployer_image" { - type = string - default = "ibm-redhat-8-10-minimal-amd64-4" - description = "The image to use to deploy the deployer host." -} - -variable "deployer_instance_profile" { - type = string - default = "bx2-8x32" - description = "Deployer should be only used for better deployment performance" +variable "deployer_instance" { + type = object({ + image = string + profile = string + }) + default = { + image = "ibm-redhat-8-10-minimal-amd64-4" + profile = "bx2-8x32" + } + description = "Configuration for the deployer node, including the custom image and instance profile. By default, uses fixpack_15 image and a bx2-8x32 profile." } ############################################################################## @@ -203,7 +190,7 @@ variable "client_instances" { description = "Number of instances to be launched for client." } -variable "cluster_subnet_ids" { +variable "cluster_subnet_id" { type = string default = null description = "Name of an existing subnets in which the cluster resources will be deployed. If no value is given, then new subnet(s) will be provisioned for the cluster. [Learn more](https://cloud.ibm.com/docs/vpc)" @@ -234,17 +221,15 @@ variable "management_instances" { variable "static_compute_instances" { type = list( object({ - profile = string - count = number - image = string - filesystem = string + profile = string + count = number + image = string }) ) default = [{ - profile = "cx2-2x4" - count = 0 - image = "ibm-redhat-8-10-minimal-amd64-4" - filesystem = "/gpfs/fs1" + profile = "cx2-2x4" + count = 0 + image = "ibm-redhat-8-10-minimal-amd64-4" }] description = "Min Number of instances to be launched for compute cluster." } @@ -259,7 +244,7 @@ variable "dynamic_compute_instances" { ) default = [{ profile = "cx2-2x4" - count = 1024 + count = 500 image = "ibm-redhat-8-10-minimal-amd64-4" }] description = "MaxNumber of instances to be launched for compute cluster." @@ -330,6 +315,24 @@ variable "storage_servers" { description = "Number of BareMetal Servers to be launched for storage cluster." } +variable "tie_breaker_bm_server" { + type = list( + object({ + profile = string + count = number + image = string + filesystem = string + }) + ) + default = [{ + profile = "cx2d-metal-96x192" + count = 1 + image = "ibm-redhat-8-10-minimal-amd64-4" + filesystem = "fs1" + }] + description = "BareMetal Server to be launched for Tie Breaker." +} + variable "protocol_subnets" { type = list(string) default = null @@ -437,10 +440,10 @@ variable "dns_custom_resolver_id" { variable "dns_domain_names" { type = object({ compute = string - storage = string - protocol = string - client = string - gklm = string + storage = optional(string) + protocol = optional(string) + client = optional(string) + gklm = optional(string) }) default = { compute = "comp.com" @@ -495,6 +498,12 @@ variable "existing_kms_instance_guid" { description = "The existing KMS instance guid." } +variable "key_protect_instance_id" { + type = string + default = null + description = "An existing Key Protect instance used for filesystem encryption" +} + # variable "hpcs_instance_name" { # type = string # default = null @@ -519,10 +528,10 @@ variable "skip_kms_s2s_auth_policy" { description = "Skip auth policy between KMS service and COS instance, set to true if this policy is already in place on account." } -variable "skip_iam_authorization_policy" { +variable "skip_iam_block_storage_authorization_policy" { type = bool - default = true - description = "Set to false if authorization policy is required for VPC block storage volumes to access kms. This can be set to true if authorization policy already exists. For more information on how to create authorization policy manually, see [creating authorization policies for block storage volume](https://cloud.ibm.com/docs/vpc?topic=vpc-block-s2s-auth&interface=ui)." + default = false + description = "When using an existing KMS instance name, set this value to true if authorization is already enabled between KMS instance and the block storage volume. Otherwise, default is set to false. Ensuring proper authorization avoids access issues during deployment.For more information on how to create authorization policy manually, see [creating authorization policies for block storage volume](https://cloud.ibm.com/docs/vpc?topic=vpc-block-s2s-auth&interface=ui)." } ############################################################################## @@ -666,25 +675,12 @@ variable "enable_hyperthreading" { # } # } -# variable "enable_app_center" { -# type = bool -# default = false -# description = "Set to true to install and enable use of the IBM Spectrum LSF Application Center GUI." -# } - -# variable "app_center_gui_password" { -# type = string -# default = "hpc@IBMCloud" -# sensitive = true -# description = "Password for IBM Spectrum LSF Application Center GUI." -# } - -# variable "app_center_db_password" { -# type = string -# default = "hpc@IBMCloud" -# sensitive = true -# description = "Password for IBM Spectrum LSF Application Center database GUI." -# } +variable "app_center_gui_password" { + type = string + default = "" + sensitive = true + description = "Password for IBM Spectrum LSF Application Center GUI." +} ############################################################################## # Symphony specific Variables @@ -828,12 +824,6 @@ variable "bastion_security_group_id" { description = "bastion security group id" } -variable "bastion_security_group_id_for_ref" { - type = string - default = null - description = "bastion security group id" -} - variable "deployer_hostname" { type = string default = null @@ -935,7 +925,7 @@ variable "ldap_instance_key_pair" { description = "Name of the SSH key configured in your IBM Cloud account that is used to establish a connection to the LDAP Server. Make sure that the SSH key is present in the same resource group and region where the LDAP Servers are provisioned. If you do not have an SSH key in your IBM Cloud account, create one by using the [SSH keys](https://cloud.ibm.com/docs/vpc?topic=vpc-ssh-keys) instructions." } -variable "ldap_instances" { +variable "ldap_instance" { type = list( object({ profile = string @@ -981,22 +971,22 @@ variable "gklm_instances" { default = [{ profile = "bx2-2x8" count = 2 - image = "ibm-redhat-8-10-minimal-amd64-4" + image = "hpcc-scale-gklm4202-v2-5-2" }] - description = "Number of instances to be launched for client." + description = "Number of GKLM instances to be launched for scale cluster." } -# variable "scale_encryption_admin_default_password" { -# type = string -# default = null -# description = "The default administrator password used for resetting the admin password based on the user input. The password has to be updated which was configured during the GKLM installation." -# } +variable "scale_encryption_admin_default_password" { + type = string + default = "SKLM@dmin123" + description = "The default administrator password used for resetting the admin password based on the user input. The password has to be updated which was configured during the GKLM installation." +} -# variable "scale_encryption_admin_username" { -# type = string -# default = null -# description = "The default Admin username for Security Key Lifecycle Manager(GKLM)." -# } +variable "scale_encryption_admin_username" { + type = string + default = null + description = "The default Admin username for Security Key Lifecycle Manager(GKLM)." +} variable "scale_encryption_admin_password" { type = string @@ -1082,6 +1072,11 @@ variable "bastion_fip" { description = "bastion fip" } +variable "scale_compute_cluster_filesystem_mountpoint" { + type = string + default = "/gpfs/fs1" + description = "Compute cluster (accessingCluster) Filesystem mount point." +} ############################################################################## # Dedicatedhost Variables ############################################################################## @@ -1126,3 +1121,48 @@ variable "resource_group_ids" { default = null description = "Map describing resource groups to create or reference" } + +############################################################################## +# Login Variables +############################################################################## +variable "login_instance" { + type = list( + object({ + profile = string + image = string + }) + ) + default = [{ + profile = "bx2-2x8" + image = "hpcaas-lsf10-rhel810-compute-v8" + }] + description = "Number of instances to be launched for login node." +} + +############################################################################## +# Environment Variables +############################################################################## + +# tflint-ignore: all +variable "TF_VERSION" { + type = string + default = "1.9" + description = "The version of the Terraform engine that's used in the Schematics workspace." +} + +# tflint-ignore: all +variable "TF_PARALLELISM" { + type = string + default = "250" + description = "Parallelism/ concurrent operations limit. Valid values are between 1 and 256, both inclusive. [Learn more](https://www.terraform.io/docs/internals/graph.html#walking-the-graph)." + validation { + condition = 1 <= var.TF_PARALLELISM && var.TF_PARALLELISM <= 256 + error_message = "Input \"TF_PARALLELISM\" must be greater than or equal to 1 and less than or equal to 256." + } +} + +variable "bms_boot_drive_encryption" { + type = bool + default = false + description = "To enable the encryption for the boot drive of bare metal server. Select true or false" +} \ No newline at end of file