From 1446ea08e4b480677f80d9400d18bcc7a49f3e9d Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 3 Feb 2025 21:03:40 -0500
Subject: [PATCH 001/124] Added get multi model deployment config.

---
 ads/aqua/common/utils.py                      |   7 +
 ads/aqua/extension/deployment_handler.py      |  21 +-
 ads/aqua/modeldeployment/deployment.py        | 194 +++++++++++++++++-
 ads/aqua/modeldeployment/entities.py          |  69 ++++++-
 .../aqua_multi_model_deployment_config.json   |  94 +++++++++
 .../with_extras/aqua/test_deployment.py       |  71 ++++++-
 .../aqua/test_deployment_handler.py           |  17 +-
 7 files changed, 466 insertions(+), 7 deletions(-)
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json

diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index 6dfef54b8..67660f74c 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -5,6 +5,7 @@
 
 import asyncio
 import base64
+import itertools
 import json
 import logging
 import os
@@ -1219,3 +1220,9 @@ def build_pydantic_error_message(ex: ValidationError):
         for e in ex.errors()
         if "loc" in e and e["loc"]
     } or "; ".join(e["msg"] for e in ex.errors())
+
+
+def get_combinations(input_dict: dict):
+    """Finds all combinations within input dict."""
+    keys, values = zip(*input_dict.items())
+    return [dict(zip(keys, v)) for v in itertools.product(*values)]
diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index 2a3e827c3..3a98f9a75 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 from urllib.parse import urlparse
@@ -37,7 +37,7 @@ class AquaDeploymentHandler(AquaAPIhandler):
     """
 
     @handle_exceptions
-    def get(self, id=""):
+    def get(self, id="", model_ids=None):
         """Handle GET request."""
         url_parse = urlparse(self.request.path)
         paths = url_parse.path.strip("/")
@@ -47,6 +47,13 @@ def get(self, id=""):
                     400, f"The request {self.request.path} requires model id."
                 )
             return self.get_deployment_config(id)
+        elif paths.startswith("aqua/deployments/modelconfig"):
+            if not model_ids:
+                raise HTTPError(
+                    400,
+                    f"The request {self.request.path} requires a list of model ids.",
+                )
+            return self.get_multimodel_compatible_shapes(model_ids)
         elif paths.startswith("aqua/deployments"):
             if not id:
                 return self.list()
@@ -185,6 +192,15 @@ def get_deployment_config(self, model_id):
         """Gets the deployment config for Aqua model."""
         return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id))
 
+    def get_multimodel_compatible_shapes(self, model_ids):
+        """Gets the multi model deployment config and optimal GPU allocations for Aqua models."""
+        primary_model_id = self.get_argument("primary_model_id", default=None)
+        return self.finish(
+            AquaDeploymentApp().get_multimodel_compatible_shapes(
+                model_ids=model_ids, primary_model_id=primary_model_id
+            )
+        )
+
 
 class AquaDeploymentInferenceHandler(AquaAPIhandler):
     @staticmethod
@@ -300,6 +316,7 @@ def post(self, *args, **kwargs):  # noqa: ARG002
 __handlers__ = [
     ("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
     ("deployments/config/?([^/]*)", AquaDeploymentHandler),
+    ("deployments/modelconfig/?([^/]*)", AquaDeploymentHandler),
     ("deployments/?([^/]*)", AquaDeploymentHandler),
     ("deployments/?([^/]*)/activate", AquaDeploymentHandler),
     ("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index b7787ea21..d785ab7cd 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+import copy
 import logging
 import shlex
 from typing import Dict, List, Optional, Union
@@ -14,6 +15,7 @@
 )
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
+    get_combinations,
     get_combined_params,
     get_container_config,
     get_container_image,
@@ -39,7 +41,12 @@
 from ads.aqua.model import AquaModelApp
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
+    AquaDeploymentConfig,
     AquaDeploymentDetail,
+    AquaDeploymentModelShapeInfoSummary,
+    AquaDeploymentMultiModelConfigSummary,
+    AquaDeploymentMultiModelGPUAllocation,
+    AquaDeploymentMultiModelResponse,
 )
 from ads.aqua.ui import ModelFormat
 from ads.common.object_storage_details import ObjectStorageDetails
@@ -656,6 +663,191 @@ def get_deployment_config(self, model_id: str) -> Dict:
             )
         return config
 
+    @telemetry(
+        entry_point="plugin=deployment&action=get_multimodel_compatible_shapes",
+        name="aqua",
+    )
+    def get_multimodel_compatible_shapes(
+        self, model_ids: List[str], primary_model_id: str = None
+    ) -> AquaDeploymentMultiModelConfigSummary:
+        """Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
+        If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
+        If provided, gpu count for each compatible shape will be prioritized for primary model.
+
+        For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
+
+        A - BM.GPU.H100.8 - 1, 2, 4, 8
+        B - BM.GPU.H100.8 - 1, 2, 4, 8
+        C - BM.GPU.H100.8 - 1, 2, 4, 8
+
+        If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
+        If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
+
+        Parameters
+        ----------
+        model_ids: List[str]
+            A list of OCID of the Aqua model.
+        primary_model_id: str
+            The OCID of the primary Aqua model
+
+        Returns
+        -------
+        AquaDeploymentMultiModelSummary:
+            An instance of AquaDeploymentMultiModelSummary.
+        """
+        deployment = {}
+        model_shape_gpu = {}
+        for model_id in model_ids:
+            deployment_config = AquaDeploymentConfig(
+                **self.get_deployment_config(model_id=model_id)
+            )
+            model_shape_gpu[model_id] = {
+                shape: [
+                    item.gpu_count
+                    for item in deployment_config.configuration[
+                        shape
+                    ].multi_model_deployment
+                ]
+                for shape in deployment_config.shape
+            }
+
+            deployment.update(
+                {
+                    model_id: {
+                        "shape": deployment_config.shape,
+                        "configuration": {
+                            shape: AquaDeploymentModelShapeInfoSummary(
+                                parameters=deployment_config.configuration[
+                                    shape
+                                ].parameters
+                            )
+                            for shape in deployment_config.shape
+                        },
+                    }
+                }
+            )
+
+        common_shapes = []
+        for shape_gpu in model_shape_gpu.values():
+            if not common_shapes:
+                common_shapes = list(shape_gpu.keys())
+            else:
+                common_shapes = [
+                    shape for shape in common_shapes if shape in list(shape_gpu.keys())
+                ]
+
+        if not common_shapes:
+            raise ValueError(
+                "There are no available shapes for models selected at this moment, please select different model to deploy."
+            )
+
+        gpu_allocation = {}
+        for common_shape in common_shapes:
+            model_gpu = {
+                model: shape_gpu[common_shape]
+                for model, shape_gpu in model_shape_gpu.items()
+            }
+            is_compatible, maximum_gpu_count, combination = self._verify_compatibility(
+                model_gpu, primary_model_id
+            )
+            if is_compatible:
+                gpu_allocation[common_shape] = AquaDeploymentMultiModelResponse(
+                    models=combination, total_gpus_available=maximum_gpu_count
+                )
+
+        if not gpu_allocation:
+            raise ValueError(
+                "There are no available gpu allocations for models selected at this moment, please select different model to deploy."
+            )
+
+        return AquaDeploymentMultiModelConfigSummary(
+            deployment_config=deployment, gpu_allocation=gpu_allocation
+        )
+
+    @staticmethod
+    def _verify_compatibility(
+        model_gpu_dict: Dict, primary_model_id: str = None
+    ) -> tuple:
+        """Calculates the gpu allocations for all compatible shapes.
+        If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
+        If provided, gpu count for each compatible shape will be prioritized for primary model.
+
+        For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
+
+        A - BM.GPU.H100.8 - 1, 2, 4, 8
+        B - BM.GPU.H100.8 - 1, 2, 4, 8
+        C - BM.GPU.H100.8 - 1, 2, 4, 8
+
+        If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
+        If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
+
+        Parameters
+        ----------
+        model_gpu_dict: Dict
+            A dict of Aqua model and its gpu counts.
+        primary_model_id: str
+            The OCID of the primary Aqua model
+
+        Returns
+        -------
+        tuple:
+            A tuple of gpu count allocation result.
+        """
+        maximum_gpu_count = max([gpus[-1] for gpus in model_gpu_dict.values()])
+        model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
+        if primary_model_id:
+            primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
+            for gpu_count in reversed(primary_model_gpu_list):
+                combinations = get_combinations(model_gpu_dict_copy)
+                for combination in combinations:
+                    if (
+                        len(combination) == len(model_gpu_dict_copy)
+                        and sum(combination.values()) == maximum_gpu_count - gpu_count
+                    ):
+                        combination[primary_model_id] = gpu_count
+                        return (
+                            True,
+                            maximum_gpu_count,
+                            [
+                                AquaDeploymentMultiModelGPUAllocation(
+                                    ocid=ocid, gpu_count=gpu_count
+                                )
+                                for ocid, gpu_count in combination.items()
+                            ],
+                        )
+
+        else:
+            combinations = get_combinations(model_gpu_dict_copy)
+            minimal_difference = float("inf")  # gets the positive infinity
+            optimal_combination = []
+            for combination in combinations:
+                if (
+                    len(combination) == len(model_gpu_dict_copy)
+                    and sum(combination.values()) == maximum_gpu_count
+                ):
+                    difference = max(combination.values()) - min(combination.values())
+                    if difference < minimal_difference:
+                        minimal_difference = difference
+                        optimal_combination = combination
+
+                        # find the optimal combination, no need to continue
+                        if minimal_difference == 0:
+                            break
+
+            if optimal_combination:
+                return (
+                    True,
+                    maximum_gpu_count,
+                    [
+                        AquaDeploymentMultiModelGPUAllocation(
+                            ocid=ocid, gpu_count=gpu_count
+                        )
+                        for ocid, gpu_count in optimal_combination.items()
+                    ],
+                )
+
+        return (False, 0, [])
+
     def get_deployment_default_params(
         self,
         model_id: str,
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 0b73ffe25..a26c901f3 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -1,16 +1,18 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 from dataclasses import dataclass, field
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 from oci.data_science.models import (
     ModelDeployment,
     ModelDeploymentSummary,
 )
+from pydantic import Field
 
 from ads.aqua.common.enums import Tags
+from ads.aqua.config.utils.serializer import Serializable
 from ads.aqua.constants import UNKNOWN, UNKNOWN_DICT
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
@@ -140,3 +142,66 @@ class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
 
     log_group: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
     log: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
+
+
+class AquaDeploymentMultiModelConfig(Serializable):
+    gpu_count: int
+    parameters: dict
+
+    class Config:
+        extra = "ignore"
+
+
+class AquaDeploymentModelShapeInfoSummary(Serializable):
+    parameters: dict
+
+    class Config:
+        extra = "ignore"
+
+
+class AquaDeploymentModelShapeInfo(AquaDeploymentModelShapeInfoSummary):
+    multi_model_deployment: Optional[List[AquaDeploymentMultiModelConfig]] = Field(
+        default_factory=list
+    )
+
+
+class AquaDeploymentConfigSummary(Serializable):
+    shape: List[str]
+    configuration: Dict[str, AquaDeploymentModelShapeInfoSummary] = Field(
+        default_factory=dict
+    )
+
+    class Config:
+        extra = "ignore"
+
+
+class AquaDeploymentConfig(AquaDeploymentConfigSummary):
+    configuration: Dict[str, AquaDeploymentModelShapeInfo] = Field(default_factory=dict)
+
+
+class AquaDeploymentMultiModelGPUAllocation(Serializable):
+    ocid: str
+    gpu_count: int
+
+    class Config:
+        extra = "ignore"
+
+
+class AquaDeploymentMultiModelResponse(Serializable):
+    models: List[AquaDeploymentMultiModelGPUAllocation] = Field(default_factory=list)
+    total_gpus_available: int
+
+    class Config:
+        extra = "ignore"
+
+
+class AquaDeploymentMultiModelConfigSummary(Serializable):
+    deployment_config: Dict[str, AquaDeploymentConfigSummary] = Field(
+        default_factory=dict
+    )
+    gpu_allocation: Dict[str, AquaDeploymentMultiModelResponse] = Field(
+        default_factory=dict
+    )
+
+    class Config:
+        extra = "ignore"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
new file mode 100644
index 000000000..aa73b1243
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
@@ -0,0 +1,94 @@
+{
+  "model_a": {
+    "configuration": {
+      "BM.GPU.A100-v2.8": {
+        "multi_model_deployment": [
+          {
+            "gpu_count": 1,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          },
+          {
+            "gpu_count": 2,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          },
+          {
+            "gpu_count": 8,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          }
+        ],
+        "parameters": {
+          "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+        }
+      },
+      "BM.GPU.H100.8": {
+        "multi_model_deployment": [
+          {
+            "gpu_count": 1,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          },
+          {
+            "gpu_count": 2,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          },
+          {
+            "gpu_count": 8,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          }
+        ],
+        "parameters": {
+          "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+        }
+      },
+      "VM.GPU.A10.2": {
+        "multi_model_deployment": [
+          {
+            "gpu_count": 2,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          }
+        ],
+        "parameters": {
+          "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+        }
+      },
+      "VM.GPU.A10.4": {
+        "multi_model_deployment": [
+          {
+            "gpu_count": 2,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          },
+          {
+            "gpu_count": 4,
+            "parameters": {
+              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+            }
+          }
+        ],
+        "parameters": {
+          "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+        }
+      }
+    },
+    "shape": [
+      "VM.GPU.A10.2",
+      "VM.GPU.A10.4",
+      "BM.GPU.A100-v2.8",
+      "BM.GPU.H100.8"
+    ]
+  }
+}
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 74612ac8d..e5151aaa9 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import copy
@@ -328,6 +328,59 @@ class TestDataset:
         "8080",
     ]
 
+    aqua_deployment_multi_model_config_summary = {
+        "deployment_config": {
+            "model_a": {
+                "shape": [
+                    "VM.GPU.A10.2",
+                    "VM.GPU.A10.4",
+                    "BM.GPU.A100-v2.8",
+                    "BM.GPU.H100.8",
+                ],
+                "configuration": {
+                    "VM.GPU.A10.2": {
+                        "parameters": {
+                            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                        }
+                    },
+                    "VM.GPU.A10.4": {
+                        "parameters": {
+                            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                        }
+                    },
+                    "BM.GPU.A100-v2.8": {
+                        "parameters": {
+                            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                        }
+                    },
+                    "BM.GPU.H100.8": {
+                        "parameters": {
+                            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                        }
+                    },
+                },
+            }
+        },
+        "gpu_allocation": {
+            "VM.GPU.A10.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "VM.GPU.A10.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU.A100-v2.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.H100.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+        },
+    }
+
 
 class TestAquaDeployment(unittest.TestCase):
     def setUp(self):
@@ -445,6 +498,22 @@ def test_get_deployment_config(self):
         result = self.app.get_deployment_config(TestDataset.MODEL_ID)
         assert result == None
 
+    def test_get_multimodel_compatible_shapes(self):
+        config_json = os.path.join(
+            self.curr_dir,
+            "test_data/deployment/aqua_multi_model_deployment_config.json",
+        )
+        with open(config_json, "r") as _file:
+            config = json.load(_file)
+
+        self.app.get_deployment_config = MagicMock(return_value=config)
+        result = self.app.get_multimodel_compatible_shapes(["model_a"])
+
+        assert (
+            result.model_dump()
+            == TestDataset.aqua_deployment_multi_model_config_summary
+        )
+
     @patch("ads.aqua.modeldeployment.deployment.get_container_config")
     @patch("ads.aqua.model.AquaModelApp.create")
     @patch("ads.aqua.modeldeployment.deployment.get_container_image")
diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index 95be6c351..70d9e8357 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import os
@@ -87,6 +87,21 @@ def test_get_deployment_config_without_id(self, mock_error):
         mock_error.assert_called_once()
         assert result["status"] == 400
 
+    @patch(
+        "ads.aqua.modeldeployment.AquaDeploymentApp.get_multimodel_compatible_shapes"
+    )
+    def test_get_multimodel_compatible_shapes(
+        self, mock_get_multimodel_compatible_shapes
+    ):
+        """Test get method to return multi model deployment config"""
+        self.deployment_handler.request.path = "aqua/deployments/modelconfig"
+        self.deployment_handler.get(
+            model_ids=["mock-model-id-one", "mock-model-id-two"]
+        )
+        mock_get_multimodel_compatible_shapes.assert_called_with(
+            model_ids=["mock-model-id-one", "mock-model-id-two"]
+        )
+
     @patch("ads.aqua.modeldeployment.AquaDeploymentApp.get")
     def test_get_deployment(self, mock_get):
         """Test get method to return deployment information."""

From c925ec4b01aa25dee68d3085edf030e67df6e93a Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 3 Feb 2025 21:47:01 -0500
Subject: [PATCH 002/124] Updated pr.

---
 .../aqua_multi_model_deployment_config.json   | 164 +++++++++---------
 .../aqua/test_deployment_handler.py           |   2 +-
 2 files changed, 82 insertions(+), 84 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
index aa73b1243..58b20c888 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
@@ -1,94 +1,92 @@
 {
-  "model_a": {
-    "configuration": {
-      "BM.GPU.A100-v2.8": {
-        "multi_model_deployment": [
-          {
-            "gpu_count": 1,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
-          },
-          {
-            "gpu_count": 2,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
-          },
-          {
-            "gpu_count": 8,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
+  "configuration": {
+    "BM.GPU.A100-v2.8": {
+      "multi_model_deployment": [
+        {
+          "gpu_count": 1,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
-        ],
-        "parameters": {
-          "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-        }
-      },
-      "BM.GPU.H100.8": {
-        "multi_model_deployment": [
-          {
-            "gpu_count": 1,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
-          },
-          {
-            "gpu_count": 2,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
-          },
-          {
-            "gpu_count": 8,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
+        },
+        {
+          "gpu_count": 2,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+          }
+        },
+        {
+          "gpu_count": 8,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
-        ],
-        "parameters": {
-          "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
         }
-      },
-      "VM.GPU.A10.2": {
-        "multi_model_deployment": [
-          {
-            "gpu_count": 2,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
+    },
+    "BM.GPU.H100.8": {
+      "multi_model_deployment": [
+        {
+          "gpu_count": 1,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+          }
+        },
+        {
+          "gpu_count": 2,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+          }
+        },
+        {
+          "gpu_count": 8,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
-        ],
-        "parameters": {
-          "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
         }
-      },
-      "VM.GPU.A10.4": {
-        "multi_model_deployment": [
-          {
-            "gpu_count": 2,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
-          },
-          {
-            "gpu_count": 4,
-            "parameters": {
-              "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-            }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
+    },
+    "VM.GPU.A10.2": {
+      "multi_model_deployment": [
+        {
+          "gpu_count": 2,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
-        ],
-        "parameters": {
-          "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
         }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
       }
     },
-    "shape": [
-      "VM.GPU.A10.2",
-      "VM.GPU.A10.4",
-      "BM.GPU.A100-v2.8",
-      "BM.GPU.H100.8"
-    ]
-  }
+    "VM.GPU.A10.4": {
+      "multi_model_deployment": [
+        {
+          "gpu_count": 2,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+          }
+        },
+        {
+          "gpu_count": 4,
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+          }
+        }
+      ],
+      "parameters": {
+        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+      }
+    }
+  },
+  "shape": [
+    "VM.GPU.A10.2",
+    "VM.GPU.A10.4",
+    "BM.GPU.A100-v2.8",
+    "BM.GPU.H100.8"
+  ]
 }
diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index 70d9e8357..fe139bc98 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -99,7 +99,7 @@ def test_get_multimodel_compatible_shapes(
             model_ids=["mock-model-id-one", "mock-model-id-two"]
         )
         mock_get_multimodel_compatible_shapes.assert_called_with(
-            model_ids=["mock-model-id-one", "mock-model-id-two"]
+            model_ids=["mock-model-id-one", "mock-model-id-two"], primary_model_id=None
         )
 
     @patch("ads.aqua.modeldeployment.AquaDeploymentApp.get")

From 25afd80a25ea58d5440cb5fb564ceeaa1637a380 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Tue, 4 Feb 2025 12:41:58 -0500
Subject: [PATCH 003/124] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py        |  2 +-
 .../with_extras/aqua/test_deployment.py       | 33 +++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index de0112e2d..41b48091f 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -805,7 +805,7 @@ def _verify_compatibility(
         tuple:
             A tuple of gpu count allocation result.
         """
-        maximum_gpu_count = max([gpus[-1] for gpus in model_gpu_dict.values()])
+        maximum_gpu_count = max([sorted(gpus)[-1] for gpus in model_gpu_dict.values()])
         model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
         if primary_model_id:
             primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index e5151aaa9..9f9f72a6f 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -381,6 +381,13 @@ class TestDataset:
         },
     }
 
+    model_gpu_dict = {"model_a": [2, 4], "model_b": [1, 2, 4], "model_c": [1, 2, 8]}
+    incompatible_model_gpu_dict = {
+        "model_a": [1, 2],
+        "model_b": [1, 2],
+        "model_c": [1, 2, 8],
+    }
+
 
 class TestAquaDeployment(unittest.TestCase):
     def setUp(self):
@@ -514,6 +521,32 @@ def test_get_multimodel_compatible_shapes(self):
             == TestDataset.aqua_deployment_multi_model_config_summary
         )
 
+    def test_verify_compatibility(self):
+        result = self.app._verify_compatibility(TestDataset.model_gpu_dict)
+
+        assert result[0] == True
+        assert result[1] == 8
+        assert len(result[2]) == 3
+
+        result = self.app._verify_compatibility(
+            model_gpu_dict=TestDataset.model_gpu_dict, primary_model_id="model_b"
+        )
+
+        assert result[0] == True
+        assert result[1] == 8
+        assert len(result[2]) == 3
+
+        for item in result[2]:
+            if item.ocid == "model_b":
+                # model_b gets the maximum gpu count
+                assert item.gpu_count == 4
+
+        result = self.app._verify_compatibility(TestDataset.incompatible_model_gpu_dict)
+
+        assert result[0] == False
+        assert result[1] == 0
+        assert result[2] == []
+
     @patch("ads.aqua.modeldeployment.deployment.get_container_config")
     @patch("ads.aqua.model.AquaModelApp.create")
     @patch("ads.aqua.modeldeployment.deployment.get_container_image")

From 07eb59d3644f4d334416f8a99f90418675855992 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Tue, 4 Feb 2025 16:21:46 -0800
Subject: [PATCH 004/124] inital AQUA API code changes in
 get_deployment_default_params

---
 .gitignore                                    |  3 +-
 ads/aqua/extension/deployment_handler.py      |  5 +-
 ads/aqua/modeldeployment/deployment.py        | 36 +++++++--
 .../deployment/deployment_config.json         |  4 +-
 .../deployment/deployment_gpu_config.json     | 30 +++++++
 .../deployment/deployment_gpu_config2.json    | 34 ++++++++
 .../with_extras/aqua/test_deployment.py       | 78 ++++++++++++++++++-
 7 files changed, 178 insertions(+), 12 deletions(-)
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json

diff --git a/.gitignore b/.gitignore
index 8abb0d36e..081c68f93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -86,7 +86,8 @@ celerybeat-schedule
 *.sage.py
 
 # dotenv
-.env
+.env*
+run_ads.sh
 
 # virtualenv
 .venv
diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index 2a3e827c3..dd629bbd4 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # Copyright (c) 2024 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-
+import logging
 from urllib.parse import urlparse
 
 from tornado.web import HTTPError
@@ -259,9 +259,10 @@ class AquaDeploymentParamsHandler(AquaAPIhandler):
     def get(self, model_id):
         """Handle GET request."""
         instance_shape = self.get_argument("instance_shape")
+        gpu_count = self.get_argument("gpu_count", default=None)
         return self.finish(
             AquaDeploymentApp().get_deployment_default_params(
-                model_id=model_id, instance_shape=instance_shape
+                model_id=model_id, instance_shape=instance_shape, gpu_count=gpu_count
             )
         )
 
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index b7787ea21..bc34abdac 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -660,6 +660,7 @@ def get_deployment_default_params(
         self,
         model_id: str,
         instance_shape: str,
+        gpu_count: int = None,
     ) -> List[str]:
         """Gets the default params set in the deployment configs for the given model and instance shape.
 
@@ -671,6 +672,9 @@ def get_deployment_default_params(
         instance_shape: (str).
             The shape of the instance used for deployment.
 
+        gpu_count: (int, optional).
+            The number of GPUs used by the Aqua model. Defaults to None.
+
         Returns
         -------
         List[str]:
@@ -679,6 +683,7 @@ def get_deployment_default_params(
 
         """
         default_params = []
+        config_params = {}
         model = DataScienceModel.from_id(model_id)
         try:
             container_type_key = model.custom_metadata_list.get(
@@ -695,12 +700,31 @@ def get_deployment_default_params(
             and container_type_key in InferenceContainerTypeFamily.values()
         ):
             deployment_config = self.get_deployment_config(model_id)
-            config_params = (
-                deployment_config.get("configuration", UNKNOWN_DICT)
-                .get(instance_shape, UNKNOWN_DICT)
-                .get("parameters", UNKNOWN_DICT)
-                .get(get_container_params_type(container_type_key), UNKNOWN)
-            )
+
+            instance_shape_config = deployment_config.get(
+                "configuration", UNKNOWN_DICT
+            ).get(instance_shape, UNKNOWN_DICT)
+
+            if "multi_model_deployment" in instance_shape_config and gpu_count:
+                gpu_params = (
+                    instance_shape_config
+                    .get("multi_model_deployment", UNKNOWN_DICT)
+                )
+
+                for gpu_config in gpu_params:
+                    if gpu_config["gpu_count"] == gpu_count:
+                        config_params = gpu_config.get("parameters", UNKNOWN_DICT).get(
+                            get_container_params_type(container_type_key), UNKNOWN
+                        )
+                        break
+
+            else:
+                config_params = (
+                    instance_shape_config
+                    .get("parameters", UNKNOWN_DICT)
+                    .get(get_container_params_type(container_type_key), UNKNOWN)
+                )
+
             if config_params:
                 params_list = get_params_list(config_params)
                 restricted_params_set = get_restricted_params_by_container(
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json
index b60178403..824fa8541 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json
@@ -1,6 +1,6 @@
 {
   "configuration": {
-    "VM.GPU.A10.1": {
+    "VM.GPU.A10.4": {
       "parameters": {
         "TGI_PARAMS": "--max-stop-sequences 6",
         "VLLM_PARAMS": "--max-model-len 4096"
@@ -24,7 +24,7 @@
     }
   },
   "shape": [
-    "VM.GPU.A10.1",
+    "VM.GPU.A10.4",
     "VM.Standard.A1.Flex"
   ]
 }
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json
new file mode 100644
index 000000000..7ff81d11a
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json
@@ -0,0 +1,30 @@
+{
+    "shape": [
+        "VM.GPU.A10.2",
+        "VM.GPU.A10.4",
+        "BM.GPU.A100-v2.8",
+        "BM.GPU.H100.8"
+    ],
+    "configuration": {
+        "VM.GPU.A10.4": {
+            "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+            },
+            "multi_model_deployment": [
+                {
+                    "gpu_count": 1,
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                    }
+                },
+                {
+                    "gpu_count": 2,
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code 6"
+                    }
+                }
+            ]
+        }
+    }
+}
+
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json
new file mode 100644
index 000000000..d470b6130
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json
@@ -0,0 +1,34 @@
+{
+    "shape": [
+        "VM.GPU.A10.1",
+        "VM.GPU.A10.2",
+        "BM.GPU.A10.4",
+        "BM.GPU.L40S-NC.4"
+    ],
+    "configuration": {
+        "VM.GPU.A10.2": {
+            "multi_model_deployment": [
+                {
+                    "gpu_count": 1
+                }
+            ]
+        },
+        "VM.GPU.A10.4": {
+            "multi_model_deployment": [
+                {
+                    "gpu_count": 1
+                },
+                {
+                    "gpu_count": 2
+                }
+            ]
+        },
+        "BM.GPU.L40S-NC.4": {
+            "multi_model_deployment": [
+                {
+                    "gpu_count": 2
+                }
+            ]
+        }
+    }
+}
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 74612ac8d..7a5f06f41 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -40,7 +40,9 @@ class TestDataset:
     MODEL_DEPLOYMENT_URL = "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
     MODEL_ID = "ocid1.datasciencemodeldeployment.oc1.<region>.<MODEL_OCID>"
     DEPLOYMENT_IMAGE_NAME = "dsmc://image-name:1.0.0.0"
-    DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.1"
+    DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.4"
+    DEPLOYMENT_GPU_COUNT = 1
+    DEPLOYMENT_GPU_COUNT_B = 2
     DEPLOYMENT_SHAPE_NAME_CPU = "VM.Standard.A1.Flex"
 
     model_deployment_object = [
@@ -818,11 +820,84 @@ def test_get_deployment_default_params(
         result = self.app.get_deployment_default_params(
             TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME
         )
+
         if container_params_field == "CUSTOM_PARAMS":
             assert result == []
         else:
             assert result == allowed_params
 
+
+    # @parameterized.expand(
+    #     [
+    #         (
+    #             "VLLM_PARAMS",
+    #             "odsc-vllm-serving",
+    #             1,
+    #             ["--max-model-len 4096"],
+    #             ["--max-model-len 4096"],
+    #         ),
+    #         (
+    #             "TGI_PARAMS",
+    #             "odsc-tgi-serving",
+    #             1,
+    #             [],
+    #             [],
+    #         ),
+    #         (
+    #             "CUSTOM_PARAMS",
+    #             "custom-container-key",
+    #             None,
+    #             ["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
+    #             ["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
+    #         ),
+    #     ]
+    # )
+    @patch("ads.model.datascience_model.DataScienceModel.from_id")
+    def test_get_deployment_default_params_multimodel(
+            self,
+            # container_params_field,
+            # container_type_key,
+            # gpu_count,
+            # params,
+            # allowed_params,
+            mock_from_id,
+        ):
+        """Test for fetching config details for a given deployment."""
+
+        config_json = os.path.join(
+            self.curr_dir, "test_data/deployment/deployment_gpu_config2.json"
+        )
+        with open(config_json, "r") as _file:
+            config = json.load(_file)
+        # update config params for testing
+        # config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["multi_model_deployment"] = [{"gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}}]
+
+        mock_model = MagicMock()
+        custom_metadata_list = ModelCustomMetadata()
+        custom_metadata_list.add(
+            **{"key": "deployment-container", "value": "odsc-vllm-serving"}
+        )
+        mock_model.custom_metadata_list = custom_metadata_list
+        mock_from_id.return_value = mock_model
+
+        self.app.get_deployment_config = MagicMock(return_value=config)
+        # result = self.app.get_deployment_default_params(
+        #     TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count
+        # )
+
+        result = self.app.get_deployment_default_params(
+            TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, TestDataset.DEPLOYMENT_GPU_COUNT_B
+        )
+
+        assert result == []
+        # if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"):
+        #     assert result == []
+        # else:
+        #     assert result == allowed_params
+
+
+
+
     @parameterized.expand(
         [
             (
@@ -851,6 +926,7 @@ def test_get_deployment_default_params(
             ),
         ]
     )
+
     @patch("ads.model.datascience_model.DataScienceModel.from_id")
     @patch("ads.aqua.modeldeployment.deployment.get_container_config")
     def test_validate_deployment_params(

From 5cb74b35ed8528e32f462b9e89d04d5b1ddb20d2 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Wed, 5 Feb 2025 19:54:39 +0530
Subject: [PATCH 005/124] Update to pydantic models

---
 ads/aqua/modeldeployment/entities.py | 192 ++++++++++++++++++++++-----
 1 file changed, 157 insertions(+), 35 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 0b73ffe25..fce0504a8 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -1,59 +1,65 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
-from dataclasses import dataclass, field
 from typing import List, Optional, Union
 
 from oci.data_science.models import (
     ModelDeployment,
     ModelDeploymentSummary,
 )
+from pydantic import Field, model_validator
 
 from ads.aqua.common.enums import Tags
+from ads.aqua.common.errors import AquaValueError
+from ads.aqua.config.utils.serializer import Serializable
 from ads.aqua.constants import UNKNOWN, UNKNOWN_DICT
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
 
 
-@dataclass
-class ModelParams:
-    max_tokens: int = None
-    temperature: float = None
-    top_k: float = None
-    top_p: float = None
-    model: str = None
+class ModelParams(Serializable):
+    max_tokens: Optional[int] = None
+    temperature: Optional[float] = None
+    top_k: Optional[float] = None
+    top_p: Optional[float] = None
+    model: Optional[str] = None
 
+    class Config:
+        extra = "ignore"
+        protected_namespaces = ()
 
-@dataclass
-class ShapeInfo:
-    instance_shape: str = None
-    instance_count: int = None
-    ocpus: float = None
-    memory_in_gbs: float = None
 
+class ShapeInfo(Serializable):
+    instance_shape: Optional[str] = None
+    instance_count: Optional[int] = None
+    ocpus: Optional[float] = None
+    memory_in_gbs: Optional[float] = None
 
-@dataclass(repr=False)
-class AquaDeployment(DataClassSerializable):
+    class Config:
+        extra = "ignore"
+
+
+class AquaDeployment(Serializable):
     """Represents an Aqua Model Deployment"""
 
-    id: str = None
-    display_name: str = None
-    aqua_service_model: bool = None
-    aqua_model_name: str = None
-    state: str = None
-    description: str = None
-    created_on: str = None
-    created_by: str = None
-    endpoint: str = None
-    private_endpoint_id: str = None
-    console_link: str = None
-    lifecycle_details: str = None
+    id: Optional[str] = None
+    display_name: Optional[str] = None
+    aqua_service_model: Optional[bool] = None
+    aqua_model_name: Optional[str] = None
+    state: Optional[str] = None
+    description: Optional[str] = None
+    created_on: Optional[str] = None
+    created_by: Optional[str] = None
+    endpoint: Optional[str] = None
+    private_endpoint_id: Optional[str] = None
+    console_link: Optional[str] = None
+    lifecycle_details: Optional[str] = None
     shape_info: Optional[ShapeInfo] = None
-    tags: dict = None
-    environment_variables: dict = None
-    cmd: List[str] = None
+    tags: Optional[dict] = None
+    environment_variables: Optional[dict] = None
+    cmd: Optional[List[str]] = None
 
     @classmethod
     def from_oci_model_deployment(
@@ -133,10 +139,126 @@ def from_oci_model_deployment(
             cmd=cmd,
         )
 
+    class Config:
+        extra = "ignore"
+
 
-@dataclass(repr=False)
 class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
     """Represents a details of Aqua deployment."""
 
-    log_group: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
-    log: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
+    log_group: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier)
+    log: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier)
+
+    class Config:
+        extra = "ignore"
+
+
+class ModelInfo(Serializable):
+    """Class for maintaining details of model to be deployed, usually for multi-model deployment."""
+
+    model_id: str
+    gpu_count: Optional[int] = None
+    env_var: Optional[dict] = None
+
+    class Config:
+        extra = "ignore"
+
+
+class CreateModelDeploymentDetails(Serializable):
+    """Class for creating aqua model deployment.
+
+    Properties
+    ----------
+    compartment_id: str
+        The compartment OCID
+    project_id: str
+        Target project to list deployments from.
+    display_name: str
+        The name of model deployment.
+    description: str
+        The description of the deployment.
+    model_id: (str, optional)
+        The model OCID to deploy. Either model_id or model_info should be set.
+    model_info: (List[ModelInfo], optional)
+        The model info to deploy, used for multimodel deployment. Either model_id or model_info should be set.
+    instance_count: (int, optional). Defaults to 1.
+        The number of instance used for deployment.
+    instance_shape: (str).
+        The shape of the instance used for deployment.
+    log_group_id: (str)
+        The oci logging group id. The access log and predict log share the same log group.
+    access_log_id: (str).
+        The access log OCID for the access logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
+    predict_log_id: (str).
+        The predict log OCID for the predict logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
+    bandwidth_mbps: (int). Defaults to 10.
+        The bandwidth limit on the load balancer in Mbps.
+    web_concurrency: str
+        The number of worker processes/threads to handle incoming requests
+    with_bucket_uri(bucket_uri)
+        Sets the bucket uri when uploading large size model.
+    server_port: (int).
+        The server port for docker container image.
+    health_check_port: (int).
+        The health check port for docker container image.
+    env_var : dict, optional
+        Environment variable for the deployment, by default None.
+    container_family: str
+        The image family of model deployment container runtime.
+    memory_in_gbs: float
+        The memory in gbs for the shape selected.
+    ocpus: float
+        The ocpu count for the shape selected.
+    model_file: str
+        The file used for model deployment.
+    private_endpoint_id: str
+        The private endpoint id of model deployment.
+    container_image_uri: str
+        The image of model deployment container runtime, ignored for service managed containers.
+        Required parameter for BYOC based deployments if this parameter was not set during model registration.
+    cmd_var: List[str]
+        The cmd of model deployment container runtime.
+    freeform_tags: dict
+        Freeform tags for the model deployment
+    defined_tags: dict
+        Defined tags for the model deployment
+    """
+
+    instance_shape: str
+    display_name: str
+    model_id: Optional[str] = None
+    model_info: Optional[List[ModelInfo]] = None
+    instance_count: Optional[int] = None
+    log_group_id: Optional[str] = None
+    access_log_id: Optional[str] = None
+    predict_log_id: Optional[str] = None
+    compartment_id: Optional[str] = None
+    project_id: Optional[str] = None
+    description: Optional[str] = None
+    bandwidth_mbps: Optional[int] = None
+    web_concurrency: Optional[int] = None
+    server_port: Optional[int] = None
+    health_check_port: Optional[int] = None
+    env_var: Optional[dict] = None
+    container_family: Optional[str] = None
+    memory_in_gbs: Optional[float] = None
+    ocpus: Optional[float] = None
+    model_file: Optional[str] = None
+    private_endpoint_id: Optional[str] = None
+    container_image_uri: Optional[None] = None
+    cmd_var: Optional[List[str]] = None
+    freeform_tags: Optional[dict] = None
+    defined_tags: Optional[dict] = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate_model_fields(cls, values):
+        model_id, model_info = values.get("model_id"), values.get("model_info")
+        if bool(model_id) == bool(model_info):  # either both are set or unset
+            raise AquaValueError(
+                "Exactly one of `model_id` or `model_info` must be set to create a model deployment"
+            )
+        return values
+
+    class Config:
+        extra = "ignore"

From cbd27c9ed7f2395142818ecdcd347946daa0f084 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Wed, 5 Feb 2025 20:03:05 +0530
Subject: [PATCH 006/124] Create method to support pydantic model inputs

---
 ads/aqua/modeldeployment/deployment.py | 195 ++++++++++---------------
 1 file changed, 78 insertions(+), 117 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index c65858b53..331f60334 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -3,7 +3,9 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import shlex
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Union
+
+from pydantic import ValidationError
 
 from ads.aqua.app import AquaApp, logger
 from ads.aqua.common.entities import ContainerSpec
@@ -13,6 +15,7 @@
 )
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
+    build_pydantic_error_message,
     get_combined_params,
     get_container_config,
     get_container_image,
@@ -39,6 +42,7 @@
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
     AquaDeploymentDetail,
+    CreateModelDeploymentDetails,
 )
 from ads.aqua.ui import ModelFormat
 from ads.common.object_storage_details import ObjectStorageDetails
@@ -49,6 +53,7 @@
     AQUA_DEPLOYMENT_CONTAINER_URI_METADATA_NAME,
     AQUA_MODEL_DEPLOYMENT_CONFIG,
     COMPARTMENT_OCID,
+    PROJECT_OCID,
 )
 from ads.model.datascience_model import DataScienceModel
 from ads.model.deployment import (
@@ -86,102 +91,42 @@ class AquaDeploymentApp(AquaApp):
 
     @telemetry(entry_point="plugin=deployment&action=create", name="aqua")
     def create(
-        self,
-        model_id: str,
-        instance_shape: str,
-        display_name: str,
-        instance_count: int = None,
-        log_group_id: str = None,
-        access_log_id: str = None,
-        predict_log_id: str = None,
-        compartment_id: str = None,
-        project_id: str = None,
-        description: str = None,
-        bandwidth_mbps: int = None,
-        web_concurrency: int = None,
-        server_port: int = None,
-        health_check_port: int = None,
-        env_var: Dict = None,
-        container_family: str = None,
-        memory_in_gbs: Optional[float] = None,
-        ocpus: Optional[float] = None,
-        model_file: Optional[str] = None,
-        private_endpoint_id: Optional[str] = None,
-        container_image_uri: Optional[None] = None,
-        cmd_var: List[str] = None,
-        freeform_tags: Optional[dict] = None,
-        defined_tags: Optional[dict] = None,
+        self, create_deployment_details: CreateModelDeploymentDetails = None, **kwargs
     ) -> "AquaDeployment":
         """
-        Creates a new Aqua deployment
+        Creates a new Aqua model deployment
 
         Parameters
         ----------
-        model_id: str
-            The model OCID to deploy.
-        compartment_id: str
-            The compartment OCID
-        project_id: str
-            Target project to list deployments from.
-        display_name: str
-            The name of model deployment.
-        description: str
-            The description of the deployment.
-        instance_count: (int, optional). Defaults to 1.
-            The number of instance used for deployment.
-        instance_shape: (str).
-            The shape of the instance used for deployment.
-        log_group_id: (str)
-            The oci logging group id. The access log and predict log share the same log group.
-        access_log_id: (str).
-            The access log OCID for the access logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
-        predict_log_id: (str).
-            The predict log OCID for the predict logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
-        bandwidth_mbps: (int). Defaults to 10.
-            The bandwidth limit on the load balancer in Mbps.
-        web_concurrency: str
-            The number of worker processes/threads to handle incoming requests
-        with_bucket_uri(bucket_uri)
-            Sets the bucket uri when uploading large size model.
-        server_port: (int).
-            The server port for docker container image.
-        health_check_port: (int).
-            The health check port for docker container image.
-        env_var : dict, optional
-            Environment variable for the deployment, by default None.
-        container_family: str
-            The image family of model deployment container runtime.
-        memory_in_gbs: float
-            The memory in gbs for the shape selected.
-        ocpus: float
-            The ocpu count for the shape selected.
-        model_file: str
-            The file used for model deployment.
-        private_endpoint_id: str
-            The private endpoint id of model deployment.
-        container_image_uri: str
-            The image of model deployment container runtime, ignored for service managed containers.
-            Required parameter for BYOC based deployments if this parameter was not set during model registration.
-        cmd_var: List[str]
-            The cmd of model deployment container runtime.
-        freeform_tags: dict
-            Freeform tags for the model deployment
-        defined_tags: dict
-            Defined tags for the model deployment
+        create_deployment_details: CreateModelDeploymentDetails
+            The CreateModelDeploymentDetails data class which contains all
+            required and optional fields to create a model deployment via Aqua.
+        kwargs:
+            The kwargs for creating CreateModelDeploymentDetails instance if
+            no create_deployment_details is provided.
+
         Returns
         -------
         AquaDeployment
             An Aqua deployment instance
 
         """
-        # TODO validate if the service model has no artifact and if it requires import step before deployment.
+        if not create_deployment_details:
+            try:
+                create_deployment_details = CreateModelDeploymentDetails(**kwargs)
+            except ValidationError as ex:
+                custom_errors = build_pydantic_error_message(ex)
+                raise AquaValueError(
+                    f"Invalid parameters for creating a model deployment. \nError details: {custom_errors}."
+                ) from ex
+
         # Create a model catalog entry in the user compartment
         aqua_model = AquaModelApp().create(
-            model_id=model_id,
-            compartment_id=compartment_id,
-            project_id=project_id,
-            freeform_tags=freeform_tags,
-            defined_tags=defined_tags,
+            model_id=create_deployment_details.model_id,
+            compartment_id=create_deployment_details.compartment_id or COMPARTMENT_OCID,
+            project_id=create_deployment_details.project_id or PROJECT_OCID,
+            freeform_tags=create_deployment_details.freeform_tags,
+            defined_tags=create_deployment_details.defined_tags,
         )
 
         tags = {}
@@ -197,7 +142,7 @@ def create(
         tags.update({Tags.TASK: aqua_model.freeform_tags.get(Tags.TASK, UNKNOWN)})
 
         # Set up info to get deployment config
-        config_source_id = model_id
+        config_source_id = create_deployment_details.model_id
         model_name = aqua_model.display_name
 
         is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in aqua_model.freeform_tags
@@ -217,10 +162,16 @@ def create(
                 ) from err
 
         # set up env and cmd var
-        if not env_var:
-            env_var = {}
-        if not cmd_var:
-            cmd_var = []
+        env_var = (
+            create_deployment_details.env_var
+            if create_deployment_details.env_var
+            else {}
+        )
+        cmd_var = (
+            create_deployment_details.cmd_var
+            if create_deployment_details.cmd_var
+            else []
+        )
 
         try:
             model_path_prefix = aqua_model.custom_metadata_list.get(
@@ -253,11 +204,13 @@ def create(
             env_var.update({"FT_MODEL": f"{fine_tune_output_path}"})
 
         container_type_key = self._get_container_type_key(
-            model=aqua_model, container_family=container_family
+            model=aqua_model,
+            container_family=create_deployment_details.container_family,
         )
 
-        container_image_uri = container_image_uri or get_container_image(
-            container_type=container_type_key
+        container_image_uri = (
+            create_deployment_details.container_image_uri
+            or get_container_image(container_type=container_type_key)
         )
         if not container_image_uri:
             try:
@@ -303,6 +256,7 @@ def create(
             and container_type_key.lower()
             == InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY
         ):
+            model_file = create_deployment_details.model_file
             if model_file is not None:
                 logger.info(
                     f"Overriding {model_file} as model_file for model {aqua_model.id}."
@@ -332,18 +286,19 @@ def create(
         )
         # these params cannot be overridden for Aqua deployments
         params = container_spec.get(ContainerSpec.CLI_PARM, "")
-        server_port = server_port or container_spec.get(
+        server_port = create_deployment_details.server_port or container_spec.get(
             ContainerSpec.SERVER_PORT
-        )  # Give precendece to the input parameter
-        health_check_port = health_check_port or container_spec.get(
-            ContainerSpec.HEALTH_CHECK_PORT
-        )  # Give precendece to the input parameter
+        )  # Give precedence to the input parameter
+        health_check_port = (
+            create_deployment_details.health_check_port
+            or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT)
+        )  # Give precedence to the input parameter
 
         deployment_config = self.get_deployment_config(config_source_id)
 
         config_params = (
             deployment_config.get("configuration", UNKNOWN_DICT)
-            .get(instance_shape, UNKNOWN_DICT)
+            .get(create_deployment_details.instance_shape, UNKNOWN_DICT)
             .get("parameters", UNKNOWN_DICT)
             .get(get_container_params_type(container_type_key), UNKNOWN)
         )
@@ -390,26 +345,32 @@ def create(
         # configure model deployment infrastructure
         infrastructure = (
             ModelDeploymentInfrastructure()
-            .with_project_id(project_id)
-            .with_compartment_id(compartment_id)
-            .with_shape_name(instance_shape)
-            .with_bandwidth_mbps(bandwidth_mbps)
-            .with_replica(instance_count)
-            .with_web_concurrency(web_concurrency)
-            .with_private_endpoint_id(private_endpoint_id)
+            .with_project_id(create_deployment_details.project_id or PROJECT_OCID)
+            .with_compartment_id(
+                create_deployment_details.compartment_id or COMPARTMENT_OCID
+            )
+            .with_shape_name(create_deployment_details.instance_shape)
+            .with_bandwidth_mbps(create_deployment_details.bandwidth_mbps)
+            .with_replica(create_deployment_details.instance_count)
+            .with_web_concurrency(create_deployment_details.web_concurrency)
+            .with_private_endpoint_id(create_deployment_details.private_endpoint_id)
             .with_access_log(
-                log_group_id=log_group_id,
-                log_id=access_log_id,
+                log_group_id=create_deployment_details.log_group_id,
+                log_id=create_deployment_details.access_log_id,
             )
             .with_predict_log(
-                log_group_id=log_group_id,
-                log_id=predict_log_id,
+                log_group_id=create_deployment_details.log_group_id,
+                log_id=create_deployment_details.predict_log_id,
             )
         )
-        if memory_in_gbs and ocpus and infrastructure.shape_name.endswith("Flex"):
+        if (
+            create_deployment_details.memory_in_gbs
+            and create_deployment_details.ocpus
+            and infrastructure.shape_name.endswith("Flex")
+        ):
             infrastructure.with_shape_config_details(
-                ocpus=ocpus,
-                memory_in_gbs=memory_in_gbs,
+                ocpus=create_deployment_details.ocpus,
+                memory_in_gbs=create_deployment_details.memory_in_gbs,
             )
         # configure model deployment runtime
         container_runtime = (
@@ -427,14 +388,14 @@ def create(
         if cmd_var:
             container_runtime.with_cmd(cmd_var)
 
-        tags = {**tags, **(freeform_tags or {})}
+        tags = {**tags, **(create_deployment_details.freeform_tags or {})}
         # configure model deployment and deploy model on container runtime
         deployment = (
             ModelDeployment()
-            .with_display_name(display_name)
-            .with_description(description)
+            .with_display_name(create_deployment_details.display_name)
+            .with_description(create_deployment_details.description)
             .with_freeform_tags(**tags)
-            .with_defined_tags(**(defined_tags or {}))
+            .with_defined_tags(**(create_deployment_details.defined_tags or {}))
             .with_infrastructure(infrastructure)
             .with_runtime(container_runtime)
         ).deploy(wait_for_completion=False)
@@ -461,7 +422,7 @@ def create(
         self.telemetry.record_event_async(
             category=f"aqua/{model_type}/deployment/create",
             action="shape",
-            detail=instance_shape,
+            detail=create_deployment_details.instance_shape,
             value=model_name,
         )
 

From d0d9b1742e3564841f884840ff10b094a605a1c1 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Wed, 5 Feb 2025 20:26:00 +0530
Subject: [PATCH 007/124] Update handler for post method

---
 ads/aqua/extension/deployment_handler.py | 70 +-----------------------
 1 file changed, 3 insertions(+), 67 deletions(-)

diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index 2a3e827c3..2a264f174 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 from urllib.parse import urlparse
@@ -11,7 +11,7 @@
 from ads.aqua.extension.errors import Errors
 from ads.aqua.modeldeployment import AquaDeploymentApp, MDInferenceResponse
 from ads.aqua.modeldeployment.entities import ModelParams
-from ads.config import COMPARTMENT_OCID, PROJECT_OCID
+from ads.config import COMPARTMENT_OCID
 
 
 class AquaDeploymentHandler(AquaAPIhandler):
@@ -98,71 +98,7 @@ def post(self, *args, **kwargs):  # noqa: ARG002
         if not input_data:
             raise HTTPError(400, Errors.NO_INPUT_DATA)
 
-        # required input parameters
-        display_name = input_data.get("display_name")
-        if not display_name:
-            raise HTTPError(
-                400, Errors.MISSING_REQUIRED_PARAMETER.format("display_name")
-            )
-        instance_shape = input_data.get("instance_shape")
-        if not instance_shape:
-            raise HTTPError(
-                400, Errors.MISSING_REQUIRED_PARAMETER.format("instance_shape")
-            )
-        model_id = input_data.get("model_id")
-        if not model_id:
-            raise HTTPError(400, Errors.MISSING_REQUIRED_PARAMETER.format("model_id"))
-
-        compartment_id = input_data.get("compartment_id", COMPARTMENT_OCID)
-        project_id = input_data.get("project_id", PROJECT_OCID)
-        log_group_id = input_data.get("log_group_id")
-        access_log_id = input_data.get("access_log_id")
-        predict_log_id = input_data.get("predict_log_id")
-        description = input_data.get("description")
-        instance_count = input_data.get("instance_count")
-        bandwidth_mbps = input_data.get("bandwidth_mbps")
-        web_concurrency = input_data.get("web_concurrency")
-        server_port = input_data.get("server_port")
-        health_check_port = input_data.get("health_check_port")
-        env_var = input_data.get("env_var")
-        container_family = input_data.get("container_family")
-        ocpus = input_data.get("ocpus")
-        memory_in_gbs = input_data.get("memory_in_gbs")
-        model_file = input_data.get("model_file")
-        private_endpoint_id = input_data.get("private_endpoint_id")
-        container_image_uri = input_data.get("container_image_uri")
-        cmd_var = input_data.get("cmd_var")
-        freeform_tags = input_data.get("freeform_tags")
-        defined_tags = input_data.get("defined_tags")
-
-        self.finish(
-            AquaDeploymentApp().create(
-                compartment_id=compartment_id,
-                project_id=project_id,
-                model_id=model_id,
-                display_name=display_name,
-                description=description,
-                instance_count=instance_count,
-                instance_shape=instance_shape,
-                log_group_id=log_group_id,
-                access_log_id=access_log_id,
-                predict_log_id=predict_log_id,
-                bandwidth_mbps=bandwidth_mbps,
-                web_concurrency=web_concurrency,
-                server_port=server_port,
-                health_check_port=health_check_port,
-                env_var=env_var,
-                container_family=container_family,
-                ocpus=ocpus,
-                memory_in_gbs=memory_in_gbs,
-                model_file=model_file,
-                private_endpoint_id=private_endpoint_id,
-                container_image_uri=container_image_uri,
-                cmd_var=cmd_var,
-                freeform_tags=freeform_tags,
-                defined_tags=defined_tags,
-            )
-        )
+        self.finish(AquaDeploymentApp().create(**input_data))
 
     def read(self, id):
         """Read the information of an Aqua model deployment."""

From a54666813ae19f553417dffb952be5f172b08f46 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Wed, 5 Feb 2025 20:26:46 +0530
Subject: [PATCH 008/124] Update unit tests

---
 .../with_extras/aqua/test_deployment.py       | 44 ++++++++++---------
 .../aqua/test_deployment_handler.py           | 21 ++-------
 2 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 74612ac8d..76a747fcc 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1,14 +1,13 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import copy
 import json
 import os
 import unittest
-from dataclasses import asdict
 from importlib import reload
 from unittest.mock import MagicMock, patch
 
@@ -35,6 +34,7 @@
 class TestDataset:
     SERVICE_COMPARTMENT_ID = "ocid1.compartment.oc1..<OCID>"
     USER_COMPARTMENT_ID = "ocid1.compartment.oc1..<USER_COMPARTMENT_OCID>"
+    USER_PROJECT_ID = "ocid1.project.oc1..<USER_PROJECT_OCID>"
     COMPARTMENT_ID = "ocid1.compartment.oc1..<UNIQUE_OCID>"
     MODEL_DEPLOYMENT_ID = "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
     MODEL_DEPLOYMENT_URL = "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
@@ -107,7 +107,7 @@ class TestDataset:
                 }
             ),
             "model_deployment_url": MODEL_DEPLOYMENT_URL,
-            "project_id": "ocid1.datascienceproject.oc1.<region>.<OCID>",
+            "project_id": USER_PROJECT_ID,
             "time_created": "2024-01-01T00:00:00.000000+00:00",
         }
     ]
@@ -166,7 +166,7 @@ class TestDataset:
                 }
             ),
             "model_deployment_url": MODEL_DEPLOYMENT_URL,
-            "project_id": "ocid1.datascienceproject.oc1.<region>.<OCID>",
+            "project_id": USER_PROJECT_ID,
             "time_created": "2024-01-01T00:00:00.000000+00:00",
         }
     ]
@@ -238,7 +238,7 @@ class TestDataset:
                 }
             ),
             "model_deployment_url": MODEL_DEPLOYMENT_URL,
-            "project_id": "ocid1.datascienceproject.oc1.<region>.<OCID>",
+            "project_id": USER_PROJECT_ID,
             "time_created": "2024-01-01T00:00:00.000000+00:00",
         }
     ]
@@ -288,7 +288,7 @@ class TestDataset:
     }
 
     aqua_deployment_detail = {
-        **vars(AquaDeployment(**aqua_deployment_object)),
+        **(AquaDeployment(**aqua_deployment_object).to_dict()),
         "log_group": {
             "id": "ocid1.loggroup.oc1.<region>.<OCID>",
             "name": "log-group-name",
@@ -340,6 +340,7 @@ def setUpClass(cls):
         os.environ["CONDA_BUCKET_NS"] = "test-namespace"
         os.environ["ODSC_MODEL_COMPARTMENT_OCID"] = TestDataset.SERVICE_COMPARTMENT_ID
         os.environ["PROJECT_COMPARTMENT_OCID"] = TestDataset.USER_COMPARTMENT_ID
+        os.environ["PROJECT_OCID"] = TestDataset.USER_PROJECT_ID
         reload(ads.config)
         reload(ads.aqua)
         reload(ads.aqua.modeldeployment.deployment)
@@ -350,6 +351,7 @@ def tearDownClass(cls):
         os.environ.pop("CONDA_BUCKET_NS", None)
         os.environ.pop("ODSC_MODEL_COMPARTMENT_OCID", None)
         os.environ.pop("PROJECT_COMPARTMENT_OCID", None)
+        os.environ.pop("PROJECT_OCID", None)
         reload(ads.config)
         reload(ads.aqua)
         reload(ads.aqua.modeldeployment.deployment)
@@ -370,7 +372,7 @@ def test_list_deployments(self):
         assert len(results) == 1
         expected_attributes = AquaDeployment.__annotations__.keys()
         for r in results:
-            actual_attributes = asdict(r)
+            actual_attributes = r.to_dict()
             assert set(actual_attributes) == set(
                 expected_attributes
             ), "Attributes mismatch"
@@ -401,7 +403,9 @@ def test_get_deployment(self, mock_get_resource_name):
         expected_attributes = set(AquaDeploymentDetail.__annotations__.keys()) | set(
             AquaDeployment.__annotations__.keys()
         )
-        actual_attributes = asdict(result)
+        actual_attributes = result.to_dict()
+        # print(actual_attributes)
+        print(TestDataset.aqua_deployment_detail)
         assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
         assert actual_attributes == TestDataset.aqua_deployment_detail
         assert result.log.name == "log-name"
@@ -506,8 +510,8 @@ def test_create_deployment_for_foundation_model(
 
         mock_create.assert_called_with(
             model_id=TestDataset.MODEL_ID,
-            compartment_id=None,
-            project_id=None,
+            compartment_id=TestDataset.USER_COMPARTMENT_ID,
+            project_id=TestDataset.USER_PROJECT_ID,
             freeform_tags=freeform_tags,
             defined_tags=defined_tags,
         )
@@ -515,7 +519,7 @@ def test_create_deployment_for_foundation_model(
         mock_deploy.assert_called()
 
         expected_attributes = set(AquaDeployment.__annotations__.keys())
-        actual_attributes = asdict(result)
+        actual_attributes = result.to_dict()
         assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
         expected_result = copy.deepcopy(TestDataset.aqua_deployment_object)
         expected_result["state"] = "CREATING"
@@ -580,8 +584,8 @@ def test_create_deployment_for_fine_tuned_model(
 
         mock_create.assert_called_with(
             model_id=TestDataset.MODEL_ID,
-            compartment_id=None,
-            project_id=None,
+            compartment_id=TestDataset.USER_COMPARTMENT_ID,
+            project_id=TestDataset.USER_PROJECT_ID,
             freeform_tags=None,
             defined_tags=None,
         )
@@ -589,7 +593,7 @@ def test_create_deployment_for_fine_tuned_model(
         mock_deploy.assert_called()
 
         expected_attributes = set(AquaDeployment.__annotations__.keys())
-        actual_attributes = asdict(result)
+        actual_attributes = result.to_dict()
         assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
         expected_result = copy.deepcopy(TestDataset.aqua_deployment_object)
         expected_result["state"] = "CREATING"
@@ -656,8 +660,8 @@ def test_create_deployment_for_gguf_model(
 
         mock_create.assert_called_with(
             model_id=TestDataset.MODEL_ID,
-            compartment_id=None,
-            project_id=None,
+            compartment_id=TestDataset.USER_COMPARTMENT_ID,
+            project_id=TestDataset.USER_PROJECT_ID,
             freeform_tags=None,
             defined_tags=None,
         )
@@ -665,7 +669,7 @@ def test_create_deployment_for_gguf_model(
         mock_deploy.assert_called()
 
         expected_attributes = set(AquaDeployment.__annotations__.keys())
-        actual_attributes = asdict(result)
+        actual_attributes = result.to_dict()
         assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
         expected_result = copy.deepcopy(TestDataset.aqua_deployment_object)
         expected_result["state"] = "CREATING"
@@ -735,8 +739,8 @@ def test_create_deployment_for_tei_byoc_embedding_model(
 
         mock_create.assert_called_with(
             model_id=TestDataset.MODEL_ID,
-            compartment_id=None,
-            project_id=None,
+            compartment_id=TestDataset.USER_COMPARTMENT_ID,
+            project_id=TestDataset.USER_PROJECT_ID,
             freeform_tags=None,
             defined_tags=None,
         )
@@ -744,7 +748,7 @@ def test_create_deployment_for_tei_byoc_embedding_model(
         mock_deploy.assert_called()
 
         expected_attributes = set(AquaDeployment.__annotations__.keys())
-        actual_attributes = asdict(result)
+        actual_attributes = result.to_dict()
         assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
         expected_result = copy.deepcopy(TestDataset.aqua_deployment_object)
         expected_result["state"] = "CREATING"
diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index 95be6c351..f9bb571f0 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import os
@@ -31,6 +31,8 @@ class TestDataset:
         "display_name": "test-deployment-name",
         "freeform_tags": {"ftag1": "fvalue1", "ftag2": "fvalue2"},
         "defined_tags": {"dtag1": "dvalue1", "dtag2": "dvalue2"},
+        "project_id": USER_PROJECT_ID,
+        "compartment_id": USER_COMPARTMENT_ID,
     }
     inference_request = {
         "prompt": "What is 1+1?",
@@ -140,24 +142,7 @@ def test_post(self, mock_create):
             project_id=TestDataset.USER_PROJECT_ID,
             model_id=TestDataset.deployment_request["model_id"],
             display_name=TestDataset.deployment_request["display_name"],
-            description=None,
-            instance_count=None,
             instance_shape=TestDataset.deployment_request["instance_shape"],
-            log_group_id=None,
-            access_log_id=None,
-            predict_log_id=None,
-            bandwidth_mbps=None,
-            web_concurrency=None,
-            server_port=None,
-            health_check_port=None,
-            env_var=None,
-            container_family=None,
-            memory_in_gbs=None,
-            ocpus=None,
-            model_file=None,
-            private_endpoint_id=None,
-            container_image_uri=None,
-            cmd_var=None,
             freeform_tags=TestDataset.deployment_request["freeform_tags"],
             defined_tags=TestDataset.deployment_request["defined_tags"],
         )

From 895d2f340081bfe1f19bd09764c40ade639a2864 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Wed, 5 Feb 2025 20:27:47 +0530
Subject: [PATCH 009/124] Fix params input

---
 ads/aqua/modeldeployment/inference.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/ads/aqua/modeldeployment/inference.py b/ads/aqua/modeldeployment/inference.py
index 02f9bb408..e5812ad25 100644
--- a/ads/aqua/modeldeployment/inference.py
+++ b/ads/aqua/modeldeployment/inference.py
@@ -1,15 +1,14 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*--
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import json
-from dataclasses import asdict, dataclass, field
+from dataclasses import dataclass, field
 
 import requests
 
-from ads.aqua.app import AquaApp, logger
+from ads.aqua.app import AquaApp
 from ads.aqua.modeldeployment.entities import ModelParams
 from ads.common.auth import default_signer
 from ads.telemetry import telemetry
@@ -63,7 +62,7 @@ def get_model_deployment_response(self, endpoint):
         model_response_content
         """
 
-        params_dict = asdict(self.model_params)
+        params_dict = self.model_params.to_dict()
         params_dict = {
             key: value for key, value in params_dict.items() if value is not None
         }

From 050f13fa15b64a607f639f65a84abe32240e7eea Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Wed, 5 Feb 2025 21:05:25 +0530
Subject: [PATCH 010/124] Remove newline from err message for handler

---
 ads/aqua/modeldeployment/deployment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 331f60334..d1d19685b 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -117,7 +117,7 @@ def create(
             except ValidationError as ex:
                 custom_errors = build_pydantic_error_message(ex)
                 raise AquaValueError(
-                    f"Invalid parameters for creating a model deployment. \nError details: {custom_errors}."
+                    f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
                 ) from ex
 
         # Create a model catalog entry in the user compartment

From 86e9c334d4cb46c3fb7fd6e167d9466c8c0ba633 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 5 Feb 2025 11:14:18 -0500
Subject: [PATCH 011/124] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 41b48091f..6a8c52ccd 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -749,7 +749,7 @@ def get_multimodel_compatible_shapes(
                 ]
 
         if not common_shapes:
-            raise ValueError(
+            raise AquaValueError(
                 "There are no available shapes for models selected at this moment, please select different model to deploy."
             )
 
@@ -768,7 +768,7 @@ def get_multimodel_compatible_shapes(
                 )
 
         if not gpu_allocation:
-            raise ValueError(
+            raise AquaValueError(
                 "There are no available gpu allocations for models selected at this moment, please select different model to deploy."
             )
 

From 170f7a5ae36ec42ce5c9c5fc626bdd4f6f282839 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Thu, 6 Feb 2025 00:00:19 +0530
Subject: [PATCH 012/124] simplify var init

---
 ads/aqua/modeldeployment/deployment.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index d1d19685b..f657d30cd 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -162,16 +162,8 @@ def create(
                 ) from err
 
         # set up env and cmd var
-        env_var = (
-            create_deployment_details.env_var
-            if create_deployment_details.env_var
-            else {}
-        )
-        cmd_var = (
-            create_deployment_details.cmd_var
-            if create_deployment_details.cmd_var
-            else []
-        )
+        env_var = create_deployment_details.env_var or {}
+        cmd_var = create_deployment_details.cmd_var or []
 
         try:
             model_path_prefix = aqua_model.custom_metadata_list.get(

From 02fe8299cfb14e93de9dda6a667002b11989c2e2 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Wed, 5 Feb 2025 11:17:42 -0800
Subject: [PATCH 013/124] combined multimodel and normal model unit tests for
 test_get_deployment_default_params

---
 .../deployment/deployment_gpu_config.json     |  39 ++++--
 .../deployment/deployment_gpu_config2.json    |  34 -----
 .../with_extras/aqua/test_deployment.py       | 117 +++++-------------
 3 files changed, 54 insertions(+), 136 deletions(-)
 delete mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json

diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json
index 7ff81d11a..8764c354b 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json
@@ -1,30 +1,43 @@
 {
     "shape": [
+        "VM.GPU.A10.1",
         "VM.GPU.A10.2",
-        "VM.GPU.A10.4",
-        "BM.GPU.A100-v2.8",
-        "BM.GPU.H100.8"
+        "BM.GPU.A10.4",
+        "BM.GPU.L40S-NC.4"
     ],
     "configuration": {
-        "VM.GPU.A10.4": {
+        "VM.GPU.A10.2": {
             "parameters": {
                 "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
             },
             "multi_model_deployment": [
                 {
-                    "gpu_count": 1,
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                    }
+                    "gpu_count": 1
+                }
+            ]
+        },
+        "BM.GPU.A10.4": {
+            "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+            },
+            "multi_model_deployment": [
+                {
+                    "gpu_count": 1
                 },
                 {
-                    "gpu_count": 2,
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code 6"
-                    }
+                    "gpu_count": 2
+                }
+            ]
+        },
+        "BM.GPU.L40S-NC.4": {
+            "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+            },
+            "multi_model_deployment": [
+                {
+                    "gpu_count": 2
                 }
             ]
         }
     }
 }
-
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json
deleted file mode 100644
index d470b6130..000000000
--- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config2.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-    "shape": [
-        "VM.GPU.A10.1",
-        "VM.GPU.A10.2",
-        "BM.GPU.A10.4",
-        "BM.GPU.L40S-NC.4"
-    ],
-    "configuration": {
-        "VM.GPU.A10.2": {
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 1
-                }
-            ]
-        },
-        "VM.GPU.A10.4": {
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 1
-                },
-                {
-                    "gpu_count": 2
-                }
-            ]
-        },
-        "BM.GPU.L40S-NC.4": {
-            "multi_model_deployment": [
-                {
-                    "gpu_count": 2
-                }
-            ]
-        }
-    }
-}
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 7a5f06f41..9ee399826 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -40,7 +40,7 @@ class TestDataset:
     MODEL_DEPLOYMENT_URL = "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
     MODEL_ID = "ocid1.datasciencemodeldeployment.oc1.<region>.<MODEL_OCID>"
     DEPLOYMENT_IMAGE_NAME = "dsmc://image-name:1.0.0.0"
-    DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.4"
+    DEPLOYMENT_SHAPE_NAME = "BM.GPU.A10.4"
     DEPLOYMENT_GPU_COUNT = 1
     DEPLOYMENT_GPU_COUNT_B = 2
     DEPLOYMENT_SHAPE_NAME_CPU = "VM.Standard.A1.Flex"
@@ -764,24 +764,28 @@ def test_create_deployment_for_tei_byoc_embedding_model(
             (
                 "VLLM_PARAMS",
                 "odsc-vllm-serving",
+                2,
                 ["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
                 ["--max-model-len 4096", "--trust-remote-code"],
             ),
             (
                 "VLLM_PARAMS",
                 "odsc-vllm-serving",
-                [],
-                [],
+                None,
+                ["--max-model-len 4096"],
+                ["--max-model-len 4096"],
             ),
             (
                 "TGI_PARAMS",
                 "odsc-tgi-serving",
-                ["--sharded true", "--trust-remote-code", "--max-stop-sequences"],
-                ["--max-stop-sequences"],
+                1,
+                [],
+                [],
             ),
             (
                 "CUSTOM_PARAMS",
                 "custom-container-key",
+                None,
                 ["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
                 ["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
             ),
@@ -789,24 +793,30 @@ def test_create_deployment_for_tei_byoc_embedding_model(
     )
     @patch("ads.model.datascience_model.DataScienceModel.from_id")
     def test_get_deployment_default_params(
-        self,
-        container_params_field,
-        container_type_key,
-        params,
-        allowed_params,
-        mock_from_id,
-    ):
+            self,
+            container_params_field,
+            container_type_key,
+            gpu_count,
+            params,
+            allowed_params,
+            mock_from_id,
+        ):
         """Test for fetching config details for a given deployment."""
 
         config_json = os.path.join(
-            self.curr_dir, "test_data/deployment/deployment_config.json"
+            self.curr_dir, "test_data/deployment/deployment_gpu_config.json"
         )
         with open(config_json, "r") as _file:
             config = json.load(_file)
         # update config params for testing
-        config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][
+        if gpu_count:
+            # build field for multi_model_deployment
+            config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["multi_model_deployment"] = [{"gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}}]
+        else:
+            # build field for normal deployment
+            config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][
             container_params_field
-        ] = " ".join(params)
+            ] = " ".join(params)
 
         mock_model = MagicMock()
         custom_metadata_list = ModelCustomMetadata()
@@ -817,87 +827,16 @@ def test_get_deployment_default_params(
         mock_from_id.return_value = mock_model
 
         self.app.get_deployment_config = MagicMock(return_value=config)
+
         result = self.app.get_deployment_default_params(
-            TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME
+            TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count
         )
 
-        if container_params_field == "CUSTOM_PARAMS":
+        if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"):
             assert result == []
         else:
             assert result == allowed_params
 
-
-    # @parameterized.expand(
-    #     [
-    #         (
-    #             "VLLM_PARAMS",
-    #             "odsc-vllm-serving",
-    #             1,
-    #             ["--max-model-len 4096"],
-    #             ["--max-model-len 4096"],
-    #         ),
-    #         (
-    #             "TGI_PARAMS",
-    #             "odsc-tgi-serving",
-    #             1,
-    #             [],
-    #             [],
-    #         ),
-    #         (
-    #             "CUSTOM_PARAMS",
-    #             "custom-container-key",
-    #             None,
-    #             ["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
-    #             ["--max-model-len 4096", "--seed 42", "--trust-remote-code"],
-    #         ),
-    #     ]
-    # )
-    @patch("ads.model.datascience_model.DataScienceModel.from_id")
-    def test_get_deployment_default_params_multimodel(
-            self,
-            # container_params_field,
-            # container_type_key,
-            # gpu_count,
-            # params,
-            # allowed_params,
-            mock_from_id,
-        ):
-        """Test for fetching config details for a given deployment."""
-
-        config_json = os.path.join(
-            self.curr_dir, "test_data/deployment/deployment_gpu_config2.json"
-        )
-        with open(config_json, "r") as _file:
-            config = json.load(_file)
-        # update config params for testing
-        # config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["multi_model_deployment"] = [{"gpu_count": gpu_count, "parameters": {container_params_field: " ".join(params)}}]
-
-        mock_model = MagicMock()
-        custom_metadata_list = ModelCustomMetadata()
-        custom_metadata_list.add(
-            **{"key": "deployment-container", "value": "odsc-vllm-serving"}
-        )
-        mock_model.custom_metadata_list = custom_metadata_list
-        mock_from_id.return_value = mock_model
-
-        self.app.get_deployment_config = MagicMock(return_value=config)
-        # result = self.app.get_deployment_default_params(
-        #     TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count
-        # )
-
-        result = self.app.get_deployment_default_params(
-            TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, TestDataset.DEPLOYMENT_GPU_COUNT_B
-        )
-
-        assert result == []
-        # if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"):
-        #     assert result == []
-        # else:
-        #     assert result == allowed_params
-
-
-
-
     @parameterized.expand(
         [
             (

From 4fb0923bb4260ed5c42449a65e1a72b18750a3c3 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Thu, 6 Feb 2025 01:33:31 +0530
Subject: [PATCH 014/124] Review comments

---
 ads/aqua/common/entities.py          | 16 +++++++++++++++-
 ads/aqua/modeldeployment/entities.py | 13 ++-----------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 3528e9160..742d17776 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -1,7 +1,11 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+from typing import Optional
+
+from ads.aqua.config.utils.serializer import Serializable
+
 
 class ContainerSpec:
     """
@@ -15,3 +19,13 @@ class ContainerSpec:
     ENV_VARS = "envVars"
     RESTRICTED_PARAMS = "restrictedParams"
     EVALUATION_CONFIGURATION = "evaluationConfiguration"
+
+
+class ShapeInfo(Serializable):
+    instance_shape: Optional[str] = None
+    instance_count: Optional[int] = None
+    ocpus: Optional[float] = None
+    memory_in_gbs: Optional[float] = None
+
+    class Config:
+        extra = "ignore"
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index fce0504a8..0245110ff 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -10,6 +10,7 @@
 )
 from pydantic import Field, model_validator
 
+from ads.aqua.common.entities import ShapeInfo
 from ads.aqua.common.enums import Tags
 from ads.aqua.common.errors import AquaValueError
 from ads.aqua.config.utils.serializer import Serializable
@@ -27,20 +28,10 @@ class ModelParams(Serializable):
     model: Optional[str] = None
 
     class Config:
-        extra = "ignore"
+        extra = "allow"
         protected_namespaces = ()
 
 
-class ShapeInfo(Serializable):
-    instance_shape: Optional[str] = None
-    instance_count: Optional[int] = None
-    ocpus: Optional[float] = None
-    memory_in_gbs: Optional[float] = None
-
-    class Config:
-        extra = "ignore"
-
-
 class AquaDeployment(Serializable):
     """Represents an Aqua Model Deployment"""
 

From 8d51a8b2f4d05264a78ad1f6a32e36bee10a9ff2 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 5 Feb 2025 16:43:40 -0500
Subject: [PATCH 015/124] Updated pr.

---
 ads/aqua/common/utils.py                      |  41 +++++-
 ads/aqua/extension/deployment_handler.py      |  18 ++-
 ads/aqua/modeldeployment/deployment.py        |  28 ++--
 ads/aqua/modeldeployment/entities.py          | 135 ++++++++++++++----
 .../aqua_multi_model_deployment_config.json   |  25 +---
 .../with_extras/aqua/test_deployment.py       |   8 +-
 .../aqua/test_deployment_handler.py           |   2 +-
 7 files changed, 180 insertions(+), 77 deletions(-)

diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index 67660f74c..2e172f157 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -1223,6 +1223,45 @@ def build_pydantic_error_message(ex: ValidationError):
 
 
 def get_combinations(input_dict: dict):
-    """Finds all combinations within input dict."""
+    """Finds all unique combinations within input dict.
+
+    The input is a dict of {model:[gpu_count]} on a specific shape and this method will
+    return a list of all unique combinations of gpu allocation of each model.
+
+    For example:
+
+    input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]}
+    output:
+    [
+        {'model_a': 2, 'model_b': 1, 'model_c': 1},
+        {'model_a': 2, 'model_b': 1, 'model_c': 2},
+        {'model_a': 2, 'model_b': 1, 'model_c': 8},
+        {'model_a': 2, 'model_b': 2, 'model_c': 1},
+        {'model_a': 2, 'model_b': 2, 'model_c': 2},
+        {'model_a': 2, 'model_b': 2, 'model_c': 8},
+        {'model_a': 2, 'model_b': 4, 'model_c': 1},
+        {'model_a': 2, 'model_b': 4, 'model_c': 2},
+        {'model_a': 2, 'model_b': 4, 'model_c': 8},
+        {'model_a': 4, 'model_b': 1, 'model_c': 1},
+        {'model_a': 4, 'model_b': 1, 'model_c': 2},
+        {'model_a': 4, 'model_b': 1, 'model_c': 8},
+        {'model_a': 4, 'model_b': 2, 'model_c': 1},
+        {'model_a': 4, 'model_b': 2, 'model_c': 2},
+        {'model_a': 4, 'model_b': 2, 'model_c': 8},
+        {'model_a': 4, 'model_b': 4, 'model_c': 1},
+        {'model_a': 4, 'model_b': 4, 'model_c': 2},
+        {'model_a': 4, 'model_b': 4, 'model_c': 8}
+    ]
+
+    Parameters
+    ----------
+    input_dict: dict
+        A dict of {model:[gpu_count]} on a specific shape
+
+    Returns
+    -------
+    list:
+        A list of all unique combinations of gpu allocation of each model.
+    """
     keys, values = zip(*input_dict.items())
     return [dict(zip(keys, v)) for v in itertools.product(*values)]
diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index 3a98f9a75..065ae92df 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
-# Copyright (c) 2025 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+from typing import List, Union
 from urllib.parse import urlparse
 
 from tornado.web import HTTPError
@@ -20,7 +21,7 @@ class AquaDeploymentHandler(AquaAPIhandler):
 
     Methods
     -------
-    get(self, id="")
+    get(self, id: Union[str, List[str]])
         Retrieves a list of AQUA deployments or model info or logs by ID.
     post(self, *args, **kwargs)
         Creates a new AQUA deployment.
@@ -37,7 +38,7 @@ class AquaDeploymentHandler(AquaAPIhandler):
     """
 
     @handle_exceptions
-    def get(self, id="", model_ids=None):
+    def get(self, id: Union[str, List[str]] = None):
         """Handle GET request."""
         url_parse = urlparse(self.request.path)
         paths = url_parse.path.strip("/")
@@ -48,12 +49,15 @@ def get(self, id="", model_ids=None):
                 )
             return self.get_deployment_config(id)
         elif paths.startswith("aqua/deployments/modelconfig"):
-            if not model_ids:
+            if isinstance(id, list):
+                return self.get_multimodel_compatible_shapes(id)
+            elif isinstance(id, str):
+                return self.get_deployment_config(id)
+            else:
                 raise HTTPError(
                     400,
-                    f"The request {self.request.path} requires a list of model ids.",
+                    f"The request {self.request.path} requires either a model id or a list of model ids.",
                 )
-            return self.get_multimodel_compatible_shapes(model_ids)
         elif paths.startswith("aqua/deployments"):
             if not id:
                 return self.list()
@@ -192,7 +196,7 @@ def get_deployment_config(self, model_id):
         """Gets the deployment config for Aqua model."""
         return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id))
 
-    def get_multimodel_compatible_shapes(self, model_ids):
+    def get_multimodel_compatible_shapes(self, model_ids: List[str]):
         """Gets the multi model deployment config and optimal GPU allocations for Aqua models."""
         primary_model_id = self.get_argument("primary_model_id", default=None)
         return self.finish(
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 6a8c52ccd..e15c0e912 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -42,10 +42,10 @@
     AquaDeployment,
     AquaDeploymentConfig,
     AquaDeploymentDetail,
-    AquaDeploymentModelShapeInfoSummary,
-    AquaDeploymentMultiModelConfigSummary,
-    AquaDeploymentMultiModelGPUAllocation,
-    AquaDeploymentMultiModelResponse,
+    ConfigurationItem,
+    GPUModelAllocation,
+    GPUShapeAllocation,
+    ModelDeploymentConfigSummary,
 )
 from ads.aqua.ui import ModelFormat
 from ads.common.object_storage_details import ObjectStorageDetails
@@ -681,7 +681,7 @@ def get_deployment_config(self, model_id: str) -> Dict:
     )
     def get_multimodel_compatible_shapes(
         self, model_ids: List[str], primary_model_id: str = None
-    ) -> AquaDeploymentMultiModelConfigSummary:
+    ) -> ModelDeploymentConfigSummary:
         """Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
         If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
         If provided, gpu count for each compatible shape will be prioritized for primary model.
@@ -704,8 +704,8 @@ def get_multimodel_compatible_shapes(
 
         Returns
         -------
-        AquaDeploymentMultiModelSummary:
-            An instance of AquaDeploymentMultiModelSummary.
+        ModelDeploymentConfigSummary:
+            An instance of ModelDeploymentConfigSummary.
         """
         deployment = {}
         model_shape_gpu = {}
@@ -728,7 +728,7 @@ def get_multimodel_compatible_shapes(
                     model_id: {
                         "shape": deployment_config.shape,
                         "configuration": {
-                            shape: AquaDeploymentModelShapeInfoSummary(
+                            shape: ConfigurationItem(
                                 parameters=deployment_config.configuration[
                                     shape
                                 ].parameters
@@ -763,7 +763,7 @@ def get_multimodel_compatible_shapes(
                 model_gpu, primary_model_id
             )
             if is_compatible:
-                gpu_allocation[common_shape] = AquaDeploymentMultiModelResponse(
+                gpu_allocation[common_shape] = GPUShapeAllocation(
                     models=combination, total_gpus_available=maximum_gpu_count
                 )
 
@@ -772,7 +772,7 @@ def get_multimodel_compatible_shapes(
                 "There are no available gpu allocations for models selected at this moment, please select different model to deploy."
             )
 
-        return AquaDeploymentMultiModelConfigSummary(
+        return ModelDeploymentConfigSummary(
             deployment_config=deployment, gpu_allocation=gpu_allocation
         )
 
@@ -821,9 +821,7 @@ def _verify_compatibility(
                             True,
                             maximum_gpu_count,
                             [
-                                AquaDeploymentMultiModelGPUAllocation(
-                                    ocid=ocid, gpu_count=gpu_count
-                                )
+                                GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
                                 for ocid, gpu_count in combination.items()
                             ],
                         )
@@ -851,9 +849,7 @@ def _verify_compatibility(
                     True,
                     maximum_gpu_count,
                     [
-                        AquaDeploymentMultiModelGPUAllocation(
-                            ocid=ocid, gpu_count=gpu_count
-                        )
+                        GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
                         for ocid, gpu_count in optimal_combination.items()
                     ],
                 )
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index a26c901f3..744e9e5b2 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (c) 2025 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 from dataclasses import dataclass, field
@@ -144,63 +144,146 @@ class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
     log: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier)
 
 
-class AquaDeploymentMultiModelConfig(Serializable):
-    gpu_count: int
-    parameters: dict
+class MultiModelConfig(Serializable):
+    """Describes how many GPUs and the parameters of specific shape for multi model deployment.
+
+    Attributes:
+        gpu_count (int): Number of GPUs count to this model of this shape.
+        parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
+            configure the behavior of a particular GPU shape.
+    """
+
+    gpu_count: int = Field(
+        default_factory=int, description="The number of GPUs allocated to the model."
+    )
+    parameters: Optional[Dict[str, str]] = Field(
+        default_factory=dict,
+        description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
+    )
 
     class Config:
         extra = "ignore"
 
 
-class AquaDeploymentModelShapeInfoSummary(Serializable):
-    parameters: dict
+class ConfigurationItem(Serializable):
+    """Holds key-value parameter pairs for a specific GPU shape.
+
+    Attributes:
+        parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
+            configure the behavior of a particular GPU shape.
+    """
+
+    parameters: Optional[Dict[str, str]] = Field(
+        default_factory=dict,
+        description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
+    )
 
     class Config:
         extra = "ignore"
 
 
-class AquaDeploymentModelShapeInfo(AquaDeploymentModelShapeInfoSummary):
-    multi_model_deployment: Optional[List[AquaDeploymentMultiModelConfig]] = Field(
-        default_factory=list
+class MultiModelConfigurationItem(ConfigurationItem):
+    """Holds a list of multi model configuration.
+
+    Attributes:
+        multi_model_deployment (List[MultiModelConfig]): A list of multi model configuration details.
+    """
+
+    multi_model_deployment: Optional[List[MultiModelConfig]] = Field(
+        default_factory=list, description="A list of multi model configuration details."
     )
 
 
-class AquaDeploymentConfigSummary(Serializable):
-    shape: List[str]
-    configuration: Dict[str, AquaDeploymentModelShapeInfoSummary] = Field(
-        default_factory=dict
+class ModelDeploymentConfig(Serializable):
+    """Represents one model's shape list and detailed configuration.
+
+    Attributes:
+        shape (List[str]): A list of shape names (e.g., BM.GPU.A10.4).
+        configuration (Dict[str, ConfigurationItem]): Maps each shape to its configuration details.
+    """
+
+    shape: List[str] = Field(
+        default_factory=list, description="List of supported shapes for the model."
+    )
+    configuration: Dict[str, ConfigurationItem] = Field(
+        default_factory=dict, description="Configuration details keyed by shape."
     )
 
     class Config:
         extra = "ignore"
 
 
-class AquaDeploymentConfig(AquaDeploymentConfigSummary):
-    configuration: Dict[str, AquaDeploymentModelShapeInfo] = Field(default_factory=dict)
+class AquaDeploymentConfig(ModelDeploymentConfig):
+    """Represents multi model's shape list and detailed configuration.
 
+    Attributes:
+        shape (List[str]): A list of shape names (e.g., BM.GPU.A10.4).
+        configuration (Dict[str, MultiModelConfigurationItem]): Maps each shape to its configuration details.
+    """
 
-class AquaDeploymentMultiModelGPUAllocation(Serializable):
-    ocid: str
-    gpu_count: int
+    configuration: Dict[str, MultiModelConfigurationItem] = Field(
+        default_factory=dict, description="Configuration details keyed by shape."
+    )
+
+
+class GPUModelAllocation(Serializable):
+    """Describes how many GPUs are allocated to a particular model.
+
+    Attributes:
+        ocid (str): The unique identifier of the model.
+        gpu_count (int): Number of GPUs allocated to this model.
+    """
+
+    ocid: str = Field(default_factory=str, description="The unique model OCID.")
+    gpu_count: int = Field(
+        default_factory=int, description="The number of GPUs allocated to the model."
+    )
 
     class Config:
         extra = "ignore"
 
 
-class AquaDeploymentMultiModelResponse(Serializable):
-    models: List[AquaDeploymentMultiModelGPUAllocation] = Field(default_factory=list)
-    total_gpus_available: int
+class GPUShapeAllocation(Serializable):
+    """Allocation details for a specific GPU shape.
+
+    Attributes:
+        models (List[GPUModelAllocation]): List of model GPU allocations for this shape.
+        total_gpus_available (int): The total number of GPUs available for this shape.
+    """
+
+    models: List[GPUModelAllocation] = Field(
+        default_factory=list, description="List of model allocations for this shape."
+    )
+    total_gpus_available: int = Field(
+        default_factory=int, description="Total GPUs available for this shape."
+    )
 
     class Config:
         extra = "ignore"
 
 
-class AquaDeploymentMultiModelConfigSummary(Serializable):
-    deployment_config: Dict[str, AquaDeploymentConfigSummary] = Field(
-        default_factory=dict
+class ModelDeploymentConfigSummary(Serializable):
+    """Top-level configuration model for OCI-based deployments.
+
+    Attributes:
+        deployment_config (Dict[str, ModelDeploymentConfig]): Deployment configurations
+            keyed by model OCID.
+        gpu_allocation (Dict[str, GPUShapeAllocation]): GPU allocations keyed by GPU shape.
+    """
+
+    deployment_config: Dict[str, ModelDeploymentConfig] = Field(
+        default_factory=dict,
+        description=(
+            "Deployment configuration details for each model, including supported shapes "
+            "and shape-specific parameters."
+        ),
     )
-    gpu_allocation: Dict[str, AquaDeploymentMultiModelResponse] = Field(
-        default_factory=dict
+    gpu_allocation: Dict[str, GPUShapeAllocation] = Field(
+        default_factory=dict,
+        description=(
+            "Details on how GPUs are allocated per shape, including the total "
+            "GPUs available for each shape."
+        ),
     )
 
     class Config:
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
index 58b20c888..ac197f726 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json
@@ -28,22 +28,13 @@
     "BM.GPU.H100.8": {
       "multi_model_deployment": [
         {
-          "gpu_count": 1,
-          "parameters": {
-            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-          }
+          "gpu_count": 1
         },
         {
-          "gpu_count": 2,
-          "parameters": {
-            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-          }
+          "gpu_count": 2
         },
         {
-          "gpu_count": 8,
-          "parameters": {
-            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-          }
+          "gpu_count": 8
         }
       ],
       "parameters": {
@@ -58,10 +49,7 @@
             "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
           }
         }
-      ],
-      "parameters": {
-        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-      }
+      ]
     },
     "VM.GPU.A10.4": {
       "multi_model_deployment": [
@@ -72,10 +60,7 @@
           }
         },
         {
-          "gpu_count": 4,
-          "parameters": {
-            "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-          }
+          "gpu_count": 4
         }
       ],
       "parameters": {
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 9f9f72a6f..855cc5cdb 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2025 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import copy
@@ -338,11 +338,7 @@ class TestDataset:
                     "BM.GPU.H100.8",
                 ],
                 "configuration": {
-                    "VM.GPU.A10.2": {
-                        "parameters": {
-                            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                        }
-                    },
+                    "VM.GPU.A10.2": {"parameters": {}},
                     "VM.GPU.A10.4": {
                         "parameters": {
                             "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index fe139bc98..5ea7c2bb9 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2025 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import os

From 32b73f3ae4463a20c8a3c23d9da56c8ba568a631 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 5 Feb 2025 18:27:27 -0500
Subject: [PATCH 016/124] Updated pr.

---
 ads/aqua/common/utils.py                      | 46 ----------------
 ads/aqua/modeldeployment/deployment.py        |  9 +---
 ads/aqua/modeldeployment/entities.py          | 20 +++----
 ads/aqua/modeldeployment/utils.py             | 51 ++++++++++++++++++
 .../with_extras/aqua/test_deployment.py       | 52 +++++++++++++++++--
 .../aqua/test_deployment_handler.py           |  4 +-
 6 files changed, 108 insertions(+), 74 deletions(-)
 create mode 100644 ads/aqua/modeldeployment/utils.py

diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index 142c3c680..f4e002d19 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -5,7 +5,6 @@
 
 import asyncio
 import base64
-import itertools
 import json
 import logging
 import os
@@ -1220,48 +1219,3 @@ def build_pydantic_error_message(ex: ValidationError):
         for e in ex.errors()
         if "loc" in e and e["loc"]
     } or "; ".join(e["msg"] for e in ex.errors())
-
-
-def get_combinations(input_dict: dict):
-    """Finds all unique combinations within input dict.
-
-    The input is a dict of {model:[gpu_count]} on a specific shape and this method will
-    return a list of all unique combinations of gpu allocation of each model.
-
-    For example:
-
-    input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]}
-    output:
-    [
-        {'model_a': 2, 'model_b': 1, 'model_c': 1},
-        {'model_a': 2, 'model_b': 1, 'model_c': 2},
-        {'model_a': 2, 'model_b': 1, 'model_c': 8},
-        {'model_a': 2, 'model_b': 2, 'model_c': 1},
-        {'model_a': 2, 'model_b': 2, 'model_c': 2},
-        {'model_a': 2, 'model_b': 2, 'model_c': 8},
-        {'model_a': 2, 'model_b': 4, 'model_c': 1},
-        {'model_a': 2, 'model_b': 4, 'model_c': 2},
-        {'model_a': 2, 'model_b': 4, 'model_c': 8},
-        {'model_a': 4, 'model_b': 1, 'model_c': 1},
-        {'model_a': 4, 'model_b': 1, 'model_c': 2},
-        {'model_a': 4, 'model_b': 1, 'model_c': 8},
-        {'model_a': 4, 'model_b': 2, 'model_c': 1},
-        {'model_a': 4, 'model_b': 2, 'model_c': 2},
-        {'model_a': 4, 'model_b': 2, 'model_c': 8},
-        {'model_a': 4, 'model_b': 4, 'model_c': 1},
-        {'model_a': 4, 'model_b': 4, 'model_c': 2},
-        {'model_a': 4, 'model_b': 4, 'model_c': 8}
-    ]
-
-    Parameters
-    ----------
-    input_dict: dict
-        A dict of {model:[gpu_count]} on a specific shape
-
-    Returns
-    -------
-    list:
-        A list of all unique combinations of gpu allocation of each model.
-    """
-    keys, values = zip(*input_dict.items())
-    return [dict(zip(keys, v)) for v in itertools.product(*values)]
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index b62b20dfc..9d3b07798 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -17,7 +17,6 @@
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
     build_pydantic_error_message,
-    get_combinations,
     get_combined_params,
     get_container_config,
     get_container_image,
@@ -45,12 +44,12 @@
     AquaDeployment,
     AquaDeploymentConfig,
     AquaDeploymentDetail,
-    ConfigurationItem,
     CreateModelDeploymentDetails,
     GPUModelAllocation,
     GPUShapeAllocation,
     ModelDeploymentConfigSummary,
 )
+from ads.aqua.modeldeployment.utils import get_combinations
 from ads.aqua.ui import ModelFormat
 from ads.common.object_storage_details import ObjectStorageDetails
 from ads.common.utils import get_log_links
@@ -681,11 +680,7 @@ def get_multimodel_compatible_shapes(
                     model_id: {
                         "shape": deployment_config.shape,
                         "configuration": {
-                            shape: ConfigurationItem(
-                                parameters=deployment_config.configuration[
-                                    shape
-                                ].parameters
-                            )
+                            shape: deployment_config.configuration[shape]
                             for shape in deployment_config.shape
                         },
                     }
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index e3322e8da..e8f170e33 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -282,28 +282,20 @@ class ConfigurationItem(Serializable):
     Attributes:
         parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
             configure the behavior of a particular GPU shape.
+        multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
     """
 
     parameters: Optional[Dict[str, str]] = Field(
         default_factory=dict,
         description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
     )
-
-    class Config:
-        extra = "ignore"
-
-
-class MultiModelConfigurationItem(ConfigurationItem):
-    """Holds a list of multi model configuration.
-
-    Attributes:
-        multi_model_deployment (List[MultiModelConfig]): A list of multi model configuration details.
-    """
-
     multi_model_deployment: Optional[List[MultiModelConfig]] = Field(
         default_factory=list, description="A list of multi model configuration details."
     )
 
+    class Config:
+        extra = "ignore"
+
 
 class ModelDeploymentConfig(Serializable):
     """Represents one model's shape list and detailed configuration.
@@ -329,10 +321,10 @@ class AquaDeploymentConfig(ModelDeploymentConfig):
 
     Attributes:
         shape (List[str]): A list of shape names (e.g., BM.GPU.A10.4).
-        configuration (Dict[str, MultiModelConfigurationItem]): Maps each shape to its configuration details.
+        configuration (Dict[str, ConfigurationItem]): Maps each shape to its configuration details.
     """
 
-    configuration: Dict[str, MultiModelConfigurationItem] = Field(
+    configuration: Dict[str, ConfigurationItem] = Field(
         default_factory=dict, description="Configuration details keyed by shape."
     )
 
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
new file mode 100644
index 000000000..33d529bea
--- /dev/null
+++ b/ads/aqua/modeldeployment/utils.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+"""AQUA model deployment utils"""
+
+import itertools
+
+
+def get_combinations(input_dict: dict):
+    """Finds all unique combinations within input dict.
+
+    The input is a dict of {model:[gpu_count]} on a specific shape and this method will
+    return a list of all unique combinations of gpu allocation of each model.
+
+    For example:
+
+    input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]}
+    output:
+    [
+        {'model_a': 2, 'model_b': 1, 'model_c': 1},
+        {'model_a': 2, 'model_b': 1, 'model_c': 2},
+        {'model_a': 2, 'model_b': 1, 'model_c': 8},
+        {'model_a': 2, 'model_b': 2, 'model_c': 1},
+        {'model_a': 2, 'model_b': 2, 'model_c': 2},
+        {'model_a': 2, 'model_b': 2, 'model_c': 8},
+        {'model_a': 2, 'model_b': 4, 'model_c': 1},
+        {'model_a': 2, 'model_b': 4, 'model_c': 2},
+        {'model_a': 2, 'model_b': 4, 'model_c': 8},
+        {'model_a': 4, 'model_b': 1, 'model_c': 1},
+        {'model_a': 4, 'model_b': 1, 'model_c': 2},
+        {'model_a': 4, 'model_b': 1, 'model_c': 8},
+        {'model_a': 4, 'model_b': 2, 'model_c': 1},
+        {'model_a': 4, 'model_b': 2, 'model_c': 2},
+        {'model_a': 4, 'model_b': 2, 'model_c': 8},
+        {'model_a': 4, 'model_b': 4, 'model_c': 1},
+        {'model_a': 4, 'model_b': 4, 'model_c': 2},
+        {'model_a': 4, 'model_b': 4, 'model_c': 8}
+    ]
+
+    Parameters
+    ----------
+    input_dict: dict
+        A dict of {model:[gpu_count]} on a specific shape
+
+    Returns
+    -------
+    list:
+        A list of all unique combinations of gpu allocation of each model.
+    """
+    keys, values = zip(*input_dict.items())
+    return [dict(zip(keys, v)) for v in itertools.product(*values)]
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 8711c2497..22090e40c 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -338,21 +338,65 @@ class TestDataset:
                     "BM.GPU.H100.8",
                 ],
                 "configuration": {
-                    "VM.GPU.A10.2": {"parameters": {}},
+                    "VM.GPU.A10.2": {
+                        "parameters": {},
+                        "multi_model_deployment": [
+                            {
+                                "gpu_count": 2,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            }
+                        ],
+                    },
                     "VM.GPU.A10.4": {
                         "parameters": {
                             "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                        }
+                        },
+                        "multi_model_deployment": [
+                            {
+                                "gpu_count": 2,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            },
+                            {"gpu_count": 4, "parameters": {}},
+                        ],
                     },
                     "BM.GPU.A100-v2.8": {
                         "parameters": {
                             "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                        }
+                        },
+                        "multi_model_deployment": [
+                            {
+                                "gpu_count": 1,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            },
+                            {
+                                "gpu_count": 2,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            },
+                            {
+                                "gpu_count": 8,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            },
+                        ],
                     },
                     "BM.GPU.H100.8": {
                         "parameters": {
                             "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                        }
+                        },
+                        "multi_model_deployment": [
+                            {"gpu_count": 1, "parameters": {}},
+                            {"gpu_count": 2, "parameters": {}},
+                            {"gpu_count": 8, "parameters": {}},
+                        ],
                     },
                 },
             }
diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index e159d0b77..e38f3d610 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -97,9 +97,7 @@ def test_get_multimodel_compatible_shapes(
     ):
         """Test get method to return multi model deployment config"""
         self.deployment_handler.request.path = "aqua/deployments/modelconfig"
-        self.deployment_handler.get(
-            model_ids=["mock-model-id-one", "mock-model-id-two"]
-        )
+        self.deployment_handler.get(id=["mock-model-id-one", "mock-model-id-two"])
         mock_get_multimodel_compatible_shapes.assert_called_with(
             model_ids=["mock-model-id-one", "mock-model-id-two"], primary_model_id=None
         )

From fbd77d5fb39f4035f0bdcc8c8c0bc9108d70f5db Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Thu, 6 Feb 2025 18:08:53 -0800
Subject: [PATCH 017/124] ODSC-68526:Optimize Multi-Model Configuration
 Retrieval Using Parallel Execution

---
 ads/aqua/extension/deployment_handler.py      |   6 +-
 ads/aqua/modeldeployment/deployment.py        | 177 +---------
 ads/aqua/modeldeployment/utils.py             | 317 +++++++++++++++---
 .../with_extras/aqua/test_deployment.py       |   4 +-
 .../aqua/test_deployment_handler.py           |  12 +-
 5 files changed, 303 insertions(+), 213 deletions(-)

diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index b7e885ed5..f16d0bf98 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -50,7 +50,7 @@ def get(self, id: Union[str, List[str]] = None):
             return self.get_deployment_config(id)
         elif paths.startswith("aqua/deployments/modelconfig"):
             if isinstance(id, list):
-                return self.get_multimodel_compatible_shapes(id)
+                return self.get_multimodel_deployment_config(id)
             elif isinstance(id, str):
                 return self.get_deployment_config(id)
             else:
@@ -132,11 +132,11 @@ def get_deployment_config(self, model_id):
         """Gets the deployment config for Aqua model."""
         return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id))
 
-    def get_multimodel_compatible_shapes(self, model_ids: List[str]):
+    def get_multimodel_deployment_config(self, model_ids: List[str]):
         """Gets the multi model deployment config and optimal GPU allocations for Aqua models."""
         primary_model_id = self.get_argument("primary_model_id", default=None)
         return self.finish(
-            AquaDeploymentApp().get_multimodel_compatible_shapes(
+            AquaDeploymentApp().get_multimodel_deployment_config(
                 model_ids=model_ids, primary_model_id=primary_model_id
             )
         )
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 883a68c39..591c59568 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -2,18 +2,14 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
-import copy
 import shlex
-from typing import Dict, List, Union
+from typing import Dict, List, Optional, Union
 
 from pydantic import ValidationError
 
 from ads.aqua.app import AquaApp, logger
 from ads.aqua.common.entities import ContainerSpec
-from ads.aqua.common.enums import (
-    InferenceContainerTypeFamily,
-    Tags,
-)
+from ads.aqua.common.enums import InferenceContainerTypeFamily, Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
     build_pydantic_error_message,
@@ -42,14 +38,11 @@
 from ads.aqua.model import AquaModelApp
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
-    AquaDeploymentConfig,
     AquaDeploymentDetail,
     CreateModelDeploymentDetails,
-    GPUModelAllocation,
-    GPUShapeAllocation,
     ModelDeploymentConfigSummary,
 )
-from ads.aqua.modeldeployment.utils import get_combinations
+from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
 from ads.aqua.ui import ModelFormat
 from ads.common.object_storage_details import ObjectStorageDetails
 from ads.common.utils import get_log_links
@@ -628,107 +621,16 @@ def get_deployment_config(self, model_id: str) -> Dict:
         return config
 
     @telemetry(
-        entry_point="plugin=deployment&action=get_multimodel_compatible_shapes",
+        entry_point="plugin=deployment&action=get_multimodel_deployment_config",
         name="aqua",
     )
-    def get_multimodel_compatible_shapes(
-        self, model_ids: List[str], primary_model_id: str = None
+    def get_multimodel_deployment_config(
+        self, model_ids: List[str], primary_model_id: Optional[str] = None
     ) -> ModelDeploymentConfigSummary:
-        """Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
-        If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
-        If provided, gpu count for each compatible shape will be prioritized for primary model.
-
-        For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
-
-        A - BM.GPU.H100.8 - 1, 2, 4, 8
-        B - BM.GPU.H100.8 - 1, 2, 4, 8
-        C - BM.GPU.H100.8 - 1, 2, 4, 8
-
-        If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
-        If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
-
-        Parameters
-        ----------
-        model_ids: List[str]
-            A list of OCID of the Aqua model.
-        primary_model_id: str
-            The OCID of the primary Aqua model
-
-        Returns
-        -------
-        ModelDeploymentConfigSummary:
-            An instance of ModelDeploymentConfigSummary.
         """
-        deployment = {}
-        model_shape_gpu = {}
-        for model_id in model_ids:
-            deployment_config = AquaDeploymentConfig(
-                **self.get_deployment_config(model_id=model_id)
-            )
-            model_shape_gpu[model_id] = {
-                shape: [
-                    item.gpu_count
-                    for item in deployment_config.configuration[
-                        shape
-                    ].multi_model_deployment
-                ]
-                for shape in deployment_config.shape
-            }
-
-            deployment.update(
-                {
-                    model_id: {
-                        "shape": deployment_config.shape,
-                        "configuration": {
-                            shape: deployment_config.configuration[shape]
-                            for shape in deployment_config.shape
-                        },
-                    }
-                }
-            )
-
-        common_shapes = []
-        for shape_gpu in model_shape_gpu.values():
-            if not common_shapes:
-                common_shapes = list(shape_gpu.keys())
-            else:
-                common_shapes = [
-                    shape for shape in common_shapes if shape in list(shape_gpu.keys())
-                ]
-
-        if not common_shapes:
-            raise AquaValueError(
-                "There are no available shapes for models selected at this moment, please select different model to deploy."
-            )
+        Retrieves the deployment configuration for multiple Aqua models and calculates
+        the GPU allocations for all compatible shapes.
 
-        gpu_allocation = {}
-        for common_shape in common_shapes:
-            model_gpu = {
-                model: shape_gpu[common_shape]
-                for model, shape_gpu in model_shape_gpu.items()
-            }
-            is_compatible, maximum_gpu_count, combination = self._verify_compatibility(
-                model_gpu, primary_model_id
-            )
-            if is_compatible:
-                gpu_allocation[common_shape] = GPUShapeAllocation(
-                    models=combination, total_gpus_available=maximum_gpu_count
-                )
-
-        if not gpu_allocation:
-            raise AquaValueError(
-                "There are no available gpu allocations for models selected at this moment, please select different model to deploy."
-            )
-
-        return ModelDeploymentConfigSummary(
-            deployment_config=deployment, gpu_allocation=gpu_allocation
-        )
-
-    @staticmethod
-    def _verify_compatibility(
-        model_gpu_dict: Dict, primary_model_id: str = None
-    ) -> tuple:
-        """Calculates the gpu allocations for all compatible shapes.
         If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
         If provided, gpu count for each compatible shape will be prioritized for primary model.
 
@@ -743,66 +645,19 @@ def _verify_compatibility(
 
         Parameters
         ----------
-        model_gpu_dict: Dict
-            A dict of Aqua model and its gpu counts.
-        primary_model_id: str
-            The OCID of the primary Aqua model
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
+        primary_model_id : Optional[str]
+            The OCID of the primary Aqua model. If provided, GPU allocation will prioritize
+            this model. Otherwise, GPUs will be evenly allocated.
 
         Returns
         -------
-        tuple:
-            A tuple of gpu count allocation result.
+        ModelDeploymentConfigSummary
+            A summary of the model deployment configurations and GPU allocations.
         """
-        maximum_gpu_count = max([sorted(gpus)[-1] for gpus in model_gpu_dict.values()])
-        model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
-        if primary_model_id:
-            primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
-            for gpu_count in reversed(primary_model_gpu_list):
-                combinations = get_combinations(model_gpu_dict_copy)
-                for combination in combinations:
-                    if (
-                        len(combination) == len(model_gpu_dict_copy)
-                        and sum(combination.values()) == maximum_gpu_count - gpu_count
-                    ):
-                        combination[primary_model_id] = gpu_count
-                        return (
-                            True,
-                            maximum_gpu_count,
-                            [
-                                GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
-                                for ocid, gpu_count in combination.items()
-                            ],
-                        )
-
-        else:
-            combinations = get_combinations(model_gpu_dict_copy)
-            minimal_difference = float("inf")  # gets the positive infinity
-            optimal_combination = []
-            for combination in combinations:
-                if (
-                    len(combination) == len(model_gpu_dict_copy)
-                    and sum(combination.values()) == maximum_gpu_count
-                ):
-                    difference = max(combination.values()) - min(combination.values())
-                    if difference < minimal_difference:
-                        minimal_difference = difference
-                        optimal_combination = combination
-
-                        # find the optimal combination, no need to continue
-                        if minimal_difference == 0:
-                            break
-
-            if optimal_combination:
-                return (
-                    True,
-                    maximum_gpu_count,
-                    [
-                        GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
-                        for ocid, gpu_count in optimal_combination.items()
-                    ],
-                )
 
-        return (False, 0, [])
+        return MultiModelDeploymentConfigLoader(self).load(model_ids, primary_model_id)
 
     def get_deployment_default_params(
         self,
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 33d529bea..738308188 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -3,49 +3,282 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 """AQUA model deployment utils"""
 
+import copy
 import itertools
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Optional
 
+from ads.aqua.app import AquaApp
+from ads.aqua.common.errors import AquaValueError
+from ads.aqua.modeldeployment.entities import (
+    AquaDeploymentConfig,
+    GPUModelAllocation,
+    GPUShapeAllocation,
+    ModelDeploymentConfigSummary,
+)
+from ads.config import AQUA_MODEL_DEPLOYMENT_CONFIG
 
-def get_combinations(input_dict: dict):
-    """Finds all unique combinations within input dict.
-
-    The input is a dict of {model:[gpu_count]} on a specific shape and this method will
-    return a list of all unique combinations of gpu allocation of each model.
-
-    For example:
-
-    input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]}
-    output:
-    [
-        {'model_a': 2, 'model_b': 1, 'model_c': 1},
-        {'model_a': 2, 'model_b': 1, 'model_c': 2},
-        {'model_a': 2, 'model_b': 1, 'model_c': 8},
-        {'model_a': 2, 'model_b': 2, 'model_c': 1},
-        {'model_a': 2, 'model_b': 2, 'model_c': 2},
-        {'model_a': 2, 'model_b': 2, 'model_c': 8},
-        {'model_a': 2, 'model_b': 4, 'model_c': 1},
-        {'model_a': 2, 'model_b': 4, 'model_c': 2},
-        {'model_a': 2, 'model_b': 4, 'model_c': 8},
-        {'model_a': 4, 'model_b': 1, 'model_c': 1},
-        {'model_a': 4, 'model_b': 1, 'model_c': 2},
-        {'model_a': 4, 'model_b': 1, 'model_c': 8},
-        {'model_a': 4, 'model_b': 2, 'model_c': 1},
-        {'model_a': 4, 'model_b': 2, 'model_c': 2},
-        {'model_a': 4, 'model_b': 2, 'model_c': 8},
-        {'model_a': 4, 'model_b': 4, 'model_c': 1},
-        {'model_a': 4, 'model_b': 4, 'model_c': 2},
-        {'model_a': 4, 'model_b': 4, 'model_c': 8}
-    ]
-
-    Parameters
-    ----------
-    input_dict: dict
-        A dict of {model:[gpu_count]} on a specific shape
-
-    Returns
-    -------
-    list:
-        A list of all unique combinations of gpu allocation of each model.
+
+class MultiModelDeploymentConfigLoader:
+    """
+    Processes multiple model deployment configurations to determine compatible GPU shapes
+    and calculate optimal GPU allocations.
     """
-    keys, values = zip(*input_dict.items())
-    return [dict(zip(keys, v)) for v in itertools.product(*values)]
+
+    MAX_WORKERS = 10
+
+    def __init__(self, deployment_app: AquaApp):
+        """
+        Initializes the processor with a reference to the `AquaDeploymentApp` to fetch model configurations.
+
+        Parameters
+        ----------
+        deployment_app : AquaDeploymentApp
+            An instance of AquaDeploymentApp used to fetch model deployment configurations.
+        """
+        self.deployment_app = deployment_app
+
+    def load(
+        self, model_ids: List[str], primary_model_id: Optional[str] = None
+    ) -> ModelDeploymentConfigSummary:
+        """
+        Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
+
+        Parameters
+        ----------
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
+        primary_model_id : Optional[str], optional
+            The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
+            Otherwise, GPUs are evenly allocated.
+
+        Returns
+        -------
+        ModelDeploymentConfigSummary
+            A summary of the deployment configurations and GPU allocations.
+
+        Raises
+        ------
+        AquaValueError
+            If no compatible shapes or GPU allocations are available.
+        """
+        deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
+        model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
+
+        common_shapes = self._get_common_shapes(model_shape_gpu)
+        if not common_shapes:
+            raise AquaValueError(
+                "No available shapes for selected models. Choose a different model."
+            )
+
+        gpu_allocation = self._compute_gpu_allocation(
+            common_shapes, model_shape_gpu, primary_model_id
+        )
+        if not gpu_allocation:
+            raise AquaValueError(
+                "No available GPU allocations. Choose a different model."
+            )
+
+        return ModelDeploymentConfigSummary(
+            deployment_config=deployment, gpu_allocation=gpu_allocation
+        )
+
+    def _fetch_deployment_configs_concurrently(
+        self, model_ids: List[str]
+    ) -> Dict[str, AquaDeploymentConfig]:
+        """Fetches deployment configurations in parallel using ThreadPoolExecutor."""
+        with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor:
+            results = executor.map(
+                lambda model_id: self.deployment_app.get_config(
+                    model_id, AQUA_MODEL_DEPLOYMENT_CONFIG
+                ),
+                model_ids,
+            )
+
+        return {
+            model_id: AquaDeploymentConfig(**config)
+            for model_id, config in zip(model_ids, results)
+        }
+
+    def _extract_model_shape_gpu(
+        self, deployment_configs: Dict[str, AquaDeploymentConfig]
+    ):
+        """Extracts shape and GPU count details from deployment configurations."""
+        model_shape_gpu = {}
+        deployment = {}
+
+        for model_id, config in deployment_configs.items():
+            model_shape_gpu[model_id] = {
+                shape: [
+                    item.gpu_count
+                    for item in config.configuration[shape].multi_model_deployment
+                ]
+                for shape in config.shape
+            }
+            deployment[model_id] = {
+                "shape": config.shape,
+                "configuration": {
+                    shape: config.configuration[shape] for shape in config.shape
+                },
+            }
+
+        return model_shape_gpu, deployment
+
+    def _get_common_shapes(
+        self, model_shape_gpu: Dict[str, Dict[str, List[int]]]
+    ) -> List[str]:
+        """Finds common shapes across all models."""
+        return list(
+            set.intersection(
+                *(set(shapes.keys()) for shapes in model_shape_gpu.values())
+            )
+        )
+
+    def _compute_gpu_allocation(
+        self,
+        common_shapes: List[str],
+        model_shape_gpu: Dict[str, Dict[str, List[int]]],
+        primary_model_id: Optional[str],
+    ) -> Dict[str, GPUShapeAllocation]:
+        """Computes GPU allocation for common shapes."""
+        gpu_allocation = {}
+
+        for common_shape in common_shapes:
+            model_gpu = {
+                model: shape_gpu[common_shape]
+                for model, shape_gpu in model_shape_gpu.items()
+            }
+            is_compatible, max_gpu_count, combination = self._verify_compatibility(
+                model_gpu, primary_model_id
+            )
+
+            if is_compatible:
+                gpu_allocation[common_shape] = GPUShapeAllocation(
+                    models=combination, total_gpus_available=max_gpu_count
+                )
+
+        return gpu_allocation
+
+    def _verify_compatibility(
+        self, model_gpu_dict: Dict, primary_model_id: str = None
+    ) -> tuple:
+        """Calculates the gpu allocations for all compatible shapes.
+        If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
+        If provided, gpu count for each compatible shape will be prioritized for primary model.
+
+        For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
+
+        A - BM.GPU.H100.8 - 1, 2, 4, 8
+        B - BM.GPU.H100.8 - 1, 2, 4, 8
+        C - BM.GPU.H100.8 - 1, 2, 4, 8
+
+        If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
+        If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
+
+        Parameters
+        ----------
+        model_gpu_dict: Dict
+            A dict of Aqua model and its gpu counts.
+        primary_model_id: str
+            The OCID of the primary Aqua model
+
+        Returns
+        -------
+        tuple:
+            A tuple of gpu count allocation result.
+        """
+        maximum_gpu_count = max([sorted(gpus)[-1] for gpus in model_gpu_dict.values()])
+        model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
+        if primary_model_id:
+            primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
+            for gpu_count in reversed(primary_model_gpu_list):
+                combinations = self.get_combinations(model_gpu_dict_copy)
+                for combination in combinations:
+                    if (
+                        len(combination) == len(model_gpu_dict_copy)
+                        and sum(combination.values()) == maximum_gpu_count - gpu_count
+                    ):
+                        combination[primary_model_id] = gpu_count
+                        return (
+                            True,
+                            maximum_gpu_count,
+                            [
+                                GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
+                                for ocid, gpu_count in combination.items()
+                            ],
+                        )
+
+        else:
+            combinations = self.get_combinations(model_gpu_dict_copy)
+            minimal_difference = float("inf")  # gets the positive infinity
+            optimal_combination = []
+            for combination in combinations:
+                if (
+                    len(combination) == len(model_gpu_dict_copy)
+                    and sum(combination.values()) == maximum_gpu_count
+                ):
+                    difference = max(combination.values()) - min(combination.values())
+                    if difference < minimal_difference:
+                        minimal_difference = difference
+                        optimal_combination = combination
+
+                        # find the optimal combination, no need to continue
+                        if minimal_difference == 0:
+                            break
+
+            if optimal_combination:
+                return (
+                    True,
+                    maximum_gpu_count,
+                    [
+                        GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
+                        for ocid, gpu_count in optimal_combination.items()
+                    ],
+                )
+
+        return (False, 0, [])
+
+    @staticmethod
+    def get_combinations(input_dict: dict):
+        """Finds all unique combinations within input dict.
+
+        The input is a dict of {model:[gpu_count]} on a specific shape and this method will
+        return a list of all unique combinations of gpu allocation of each model.
+
+        For example:
+
+        input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]}
+        output:
+        [
+            {'model_a': 2, 'model_b': 1, 'model_c': 1},
+            {'model_a': 2, 'model_b': 1, 'model_c': 2},
+            {'model_a': 2, 'model_b': 1, 'model_c': 8},
+            {'model_a': 2, 'model_b': 2, 'model_c': 1},
+            {'model_a': 2, 'model_b': 2, 'model_c': 2},
+            {'model_a': 2, 'model_b': 2, 'model_c': 8},
+            {'model_a': 2, 'model_b': 4, 'model_c': 1},
+            {'model_a': 2, 'model_b': 4, 'model_c': 2},
+            {'model_a': 2, 'model_b': 4, 'model_c': 8},
+            {'model_a': 4, 'model_b': 1, 'model_c': 1},
+            {'model_a': 4, 'model_b': 1, 'model_c': 2},
+            {'model_a': 4, 'model_b': 1, 'model_c': 8},
+            {'model_a': 4, 'model_b': 2, 'model_c': 1},
+            {'model_a': 4, 'model_b': 2, 'model_c': 2},
+            {'model_a': 4, 'model_b': 2, 'model_c': 8},
+            {'model_a': 4, 'model_b': 4, 'model_c': 1},
+            {'model_a': 4, 'model_b': 4, 'model_c': 2},
+            {'model_a': 4, 'model_b': 4, 'model_c': 8}
+        ]
+
+        Parameters
+        ----------
+        input_dict: dict
+            A dict of {model:[gpu_count]} on a specific shape
+
+        Returns
+        -------
+        list:
+            A list of all unique combinations of gpu allocation of each model.
+        """
+        keys, values = zip(*input_dict.items())
+        return [dict(zip(keys, v)) for v in itertools.product(*values)]
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index b99ee2050..37fb48036 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -551,7 +551,7 @@ def test_get_deployment_config(self):
         result = self.app.get_deployment_config(TestDataset.MODEL_ID)
         assert result == None
 
-    def test_get_multimodel_compatible_shapes(self):
+    def test_get_multimodel_deployment_config(self):
         config_json = os.path.join(
             self.curr_dir,
             "test_data/deployment/aqua_multi_model_deployment_config.json",
@@ -560,7 +560,7 @@ def test_get_multimodel_compatible_shapes(self):
             config = json.load(_file)
 
         self.app.get_deployment_config = MagicMock(return_value=config)
-        result = self.app.get_multimodel_compatible_shapes(["model_a"])
+        result = self.app.get_multimodel_deployment_config(["model_a"])
 
         assert (
             result.model_dump()
diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index e38f3d610..8ffedacca 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -90,15 +90,15 @@ def test_get_deployment_config_without_id(self, mock_error):
         assert result["status"] == 400
 
     @patch(
-        "ads.aqua.modeldeployment.AquaDeploymentApp.get_multimodel_compatible_shapes"
+        "ads.aqua.modeldeployment.AquaDeploymentApp.get_multimodel_deployment_config"
     )
-    def test_get_multimodel_compatible_shapes(
-        self, mock_get_multimodel_compatible_shapes
+    def test_get_multimodel_deployment_config(
+        self, mock_get_multimodel_deployment_config
     ):
         """Test get method to return multi model deployment config"""
         self.deployment_handler.request.path = "aqua/deployments/modelconfig"
         self.deployment_handler.get(id=["mock-model-id-one", "mock-model-id-two"])
-        mock_get_multimodel_compatible_shapes.assert_called_with(
+        mock_get_multimodel_deployment_config.assert_called_with(
             model_ids=["mock-model-id-one", "mock-model-id-two"], primary_model_id=None
         )
 
@@ -187,7 +187,9 @@ def test_get_deployment_default_params(
         self.assertCountEqual(result["data"], self.default_params)
 
         mock_get_deployment_default_params.assert_called_with(
-            model_id="test_model_id", instance_shape=TestDataset.INSTANCE_SHAPE
+            model_id="test_model_id",
+            instance_shape=TestDataset.INSTANCE_SHAPE,
+            gpu_count=None,
         )
 
     @parameterized.expand(

From de211f208308f795166a54de8449a6d6ef177272 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Fri, 7 Feb 2025 12:28:19 +0530
Subject: [PATCH 018/124] Create multimodel catalog entry

---
 ads/aqua/common/entities.py          |  12 ++
 ads/aqua/common/enums.py             |   3 +-
 ads/aqua/model/constants.py          |   4 +-
 ads/aqua/model/model.py              | 212 ++++++++++++++++++++++-----
 ads/aqua/modeldeployment/entities.py |  13 +-
 5 files changed, 196 insertions(+), 48 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 742d17776..083835356 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -29,3 +29,15 @@ class ShapeInfo(Serializable):
 
     class Config:
         extra = "ignore"
+
+
+class ModelInfo(Serializable):
+    """Class for maintaining details of model to be deployed, usually for multi-model deployment."""
+
+    model_id: str
+    gpu_count: Optional[int] = None
+    env_var: Optional[dict] = None
+
+    class Config:
+        extra = "ignore"
+        protected_namespaces = ()
diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
index f1d1cd661..ff6c4b3e9 100644
--- a/ads/aqua/common/enums.py
+++ b/ads/aqua/common/enums.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 """
@@ -40,6 +40,7 @@ class Tags(str, metaclass=ExtendedEnumMeta):
     AQUA_EVALUATION_MODEL_ID = "evaluation_model_id"
     MODEL_FORMAT = "model_format"
     MODEL_ARTIFACT_FILE = "model_file"
+    MULTIMODEL_TYPE_TAG = "multimodel"
 
 
 class InferenceContainerType(str, metaclass=ExtendedEnumMeta):
diff --git a/ads/aqua/model/constants.py b/ads/aqua/model/constants.py
index 0a07152e4..4836d518c 100644
--- a/ads/aqua/model/constants.py
+++ b/ads/aqua/model/constants.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 """
@@ -18,6 +18,7 @@ class ModelCustomMetadataFields(str, metaclass=ExtendedEnumMeta):
     EVALUATION_CONTAINER = "evaluation-container"
     FINETUNE_CONTAINER = "finetune-container"
     DEPLOYMENT_CONTAINER_URI = "deployment-container-uri"
+    MULTIMODEL_COUNT = "model-group-count"
 
 
 class ModelTask(str, metaclass=ExtendedEnumMeta):
@@ -34,6 +35,7 @@ class FineTuningMetricCategories(str, metaclass=ExtendedEnumMeta):
 class ModelType(str, metaclass=ExtendedEnumMeta):
     FT = "FT"  # Fine Tuned Model
     BASE = "BASE"  # Base model
+    MULTIMODEL = "MULTIMODEL"
 
 
 # TODO: merge metadata key used in create FT
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 59f8decff..05a52759a 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+import builtins
 import os
 import pathlib
 from datetime import datetime, timedelta
@@ -14,6 +15,7 @@
 
 from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
 from ads.aqua.app import AquaApp
+from ads.aqua.common.entities import ModelInfo
 from ads.aqua.common.enums import (
     CustomInferenceContainerTypeFamily,
     FineTuningContainerTypeFamily,
@@ -133,7 +135,7 @@ class AquaModelApp(AquaApp):
     @telemetry(entry_point="plugin=model&action=create", name="aqua")
     def create(
         self,
-        model_id: str,
+        model_id: Union[str, List[ModelInfo]],
         project_id: str,
         compartment_id: str = None,
         freeform_tags: Optional[dict] = None,
@@ -144,8 +146,8 @@ def create(
 
         Parameters
         ----------
-        model_id: str
-            The service model id.
+        model_id: Union[str, List[ModelInfo]]
+            The model id to be deployed, or a list of model ids for multi-model deployment
         project_id: str
             The project id for custom model.
         compartment_id: str
@@ -160,54 +162,194 @@ def create(
         DataScienceModel:
             The instance of DataScienceModel.
         """
-        service_model = DataScienceModel.from_id(model_id)
+
         target_project = project_id or PROJECT_OCID
         target_compartment = compartment_id or COMPARTMENT_OCID
 
-        if service_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID:
+        if isinstance(model_id, builtins.list):
+            return self._create_multimodel_group(
+                model_info=model_id,
+                project_id=target_project,
+                compartment_id=target_compartment,
+                freeform_tags=freeform_tags,
+                defined_tags=defined_tags,
+            )
+        else:
+            service_model = DataScienceModel.from_id(model_id)
+
+            if service_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID:
+                logger.info(
+                    f"Aqua Model {model_id} already exists in user's compartment."
+                    "Skipped copying."
+                )
+                return service_model
+
+            # combine tags
+            combined_freeform_tags = {
+                **(service_model.freeform_tags or {}),
+                **(freeform_tags or {}),
+            }
+            combined_defined_tags = {
+                **(service_model.defined_tags or {}),
+                **(defined_tags or {}),
+            }
+
+            custom_model = (
+                DataScienceModel()
+                .with_compartment_id(target_compartment)
+                .with_project_id(target_project)
+                .with_model_file_description(
+                    json_dict=service_model.model_file_description
+                )
+                .with_display_name(service_model.display_name)
+                .with_description(service_model.description)
+                .with_freeform_tags(**combined_freeform_tags)
+                .with_defined_tags(**combined_defined_tags)
+                .with_custom_metadata_list(service_model.custom_metadata_list)
+                .with_defined_metadata_list(service_model.defined_metadata_list)
+                .with_provenance_metadata(service_model.provenance_metadata)
+                # TODO: decide what kwargs will be needed.
+                .create(model_by_reference=True, **kwargs)
+            )
             logger.info(
-                f"Aqua Model {model_id} already exists in user's compartment."
-                "Skipped copying."
+                f"Aqua Model {custom_model.id} created with the service model {model_id}."
             )
-            return service_model
 
-        # combine tags
-        combined_freeform_tags = {
-            **(service_model.freeform_tags or {}),
-            **(freeform_tags or {}),
-        }
-        combined_defined_tags = {
-            **(service_model.defined_tags or {}),
-            **(defined_tags or {}),
+            # tracks unique models that were created in the user compartment
+            self.telemetry.record_event_async(
+                category="aqua/service/model",
+                action="create",
+                detail=service_model.display_name,
+            )
+
+            return custom_model
+
+    def _create_multimodel_group(
+        self,
+        model_info: List[ModelInfo],
+        project_id: str,
+        compartment_id: str = None,
+        freeform_tags: Optional[dict] = None,
+        defined_tags: Optional[dict] = None,
+    ) -> DataScienceModel:
+        """
+        Create a multimodel grouping using the model list.
+        Parameters
+        ----------
+        model_info: List[ModelInfo]
+            List of model ids for creating a multimodel group.
+        project_id: str
+            The project id for multimodel group.
+        compartment_id: str
+            The compartment id for multimodel group.
+        freeform_tags: dict
+            Freeform tags for the model
+        defined_tags: dict
+            Defined tags for the model
+
+        Returns
+        -------
+        DataScienceModel
+            Instance of DataScienceModel object
+
+        """
+        artifact_list = []
+        display_name_list = []
+        default_deployment_container = None
+        model_info_dict = {"models": []}
+        model_custom_metadata = ModelCustomMetadata()
+
+        # gather individual model details for grouping
+        for model_idx, model in enumerate(model_info):
+            source_model = DataScienceModel.from_id(model.model_id)
+            display_name_list.append(source_model.display_name)
+
+            model_artifact_path = source_model.artifact
+            if not model_artifact_path:
+                raise AquaValueError(
+                    f"The selected model {source_model.display_name} does not have "
+                    f"any artifacts associated with it. Please register the model first before "
+                    f"continuing to group the model."
+                )
+            artifact_list.append(model_artifact_path)
+
+            model_info_dict["models"].append(model.to_json())
+            deployment_container = source_model.custom_metadata_list.get(
+                ModelCustomMetadataFields.DEPLOYMENT_CONTAINER,
+                ModelCustomMetadataItem(
+                    key=ModelCustomMetadataFields.DEPLOYMENT_CONTAINER
+                ),
+            ).value
+            if model_idx == 0:
+                default_deployment_container = deployment_container
+            elif deployment_container != default_deployment_container:
+                # todo: replace this error with a logger statement once we verify that the models with default
+                #   as TGI works with the vLLM container.
+                raise AquaValueError(
+                    "Unable to proceed with deployment. "
+                    "Deployment container for the models selected should be the same."
+                )
+
+            model_custom_metadata.add(
+                key=f"model-id-{model_idx}",
+                value=source_model.id,
+                description=f"Model id for {source_model.display_name} model in the multimodel group.",
+                category="Other",
+            )
+
+        model_group_display_name = f"model_group_{datetime.now().strftime('%Y%m%d')}"
+        combined_models = ", ".join(display_name_list)
+        model_group_description = (
+            f"Model grouping creating using {combined_models} models."
+        )
+
+        model_custom_metadata.add(
+            key=ModelCustomMetadataFields.DEPLOYMENT_CONTAINER,
+            value=default_deployment_container,
+            description=f"Inference container mapping for {model_group_display_name}",
+            category="Other",
+        )
+        model_custom_metadata.add(
+            key=ModelCustomMetadataFields.MULTIMODEL_COUNT,
+            value=str(len(model_info)),
+            description="Count of models grouped to create the multimodel catalog entry.",
+            category="Other",
+        )
+
+        tags = {
+            Tags.AQUA_TAG: "active",
+            Tags.MULTIMODEL_TYPE_TAG: "true",
         }
 
-        custom_model = (
+        tags = {**tags, **(freeform_tags or {})}
+
+        multi_model_group = (
             DataScienceModel()
-            .with_compartment_id(target_compartment)
-            .with_project_id(target_project)
-            .with_model_file_description(json_dict=service_model.model_file_description)
-            .with_display_name(service_model.display_name)
-            .with_description(service_model.description)
-            .with_freeform_tags(**combined_freeform_tags)
-            .with_defined_tags(**combined_defined_tags)
-            .with_custom_metadata_list(service_model.custom_metadata_list)
-            .with_defined_metadata_list(service_model.defined_metadata_list)
-            .with_provenance_metadata(service_model.provenance_metadata)
-            # TODO: decide what kwargs will be needed.
-            .create(model_by_reference=True, **kwargs)
+            .with_compartment_id(compartment_id)
+            .with_project_id(project_id)
+            .with_display_name(model_group_display_name)
+            .with_description(model_group_description)
+            .with_freeform_tags(**tags)
+            .with_defined_tags(**(defined_tags or {}))
+            .with_custom_metadata_list(model_custom_metadata)
         )
+
+        for artifact in artifact_list:
+            multi_model_group.add_artifact(uri=artifact)
+
+        multi_model_group.create(model_by_reference=True)
+
         logger.info(
-            f"Aqua Model {custom_model.id} created with the service model {model_id}."
+            f"Aqua Model {multi_model_group.id} created with the verified models {','.join(display_name_list)}."
         )
 
         # tracks unique models that were created in the user compartment
         self.telemetry.record_event_async(
-            category="aqua/service/model",
+            category="aqua/multimodel",
             action="create",
-            detail=service_model.display_name,
+            detail=combined_models,
         )
-
-        return custom_model
+        return multi_model_group
 
     @telemetry(entry_point="plugin=model&action=get", name="aqua")
     def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaModel":
@@ -1592,6 +1734,8 @@ def _rqs(self, compartment_id: str, model_type="FT", **kwargs):
             filter_tag = Tags.AQUA_FINE_TUNED_MODEL_TAG
         elif model_type == ModelType.BASE:
             filter_tag = Tags.BASE_MODEL_CUSTOM
+        elif model_type == ModelType.MULTIMODEL:
+            filter_tag = Tags.MULTIMODEL_TYPE_TAG
         else:
             raise AquaValueError(
                 f"Model of type {model_type} is unknown. The values should be in {ModelType.values()}"
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 0245110ff..a1b8e93cb 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -10,7 +10,7 @@
 )
 from pydantic import Field, model_validator
 
-from ads.aqua.common.entities import ShapeInfo
+from ads.aqua.common.entities import ModelInfo, ShapeInfo
 from ads.aqua.common.enums import Tags
 from ads.aqua.common.errors import AquaValueError
 from ads.aqua.config.utils.serializer import Serializable
@@ -144,17 +144,6 @@ class Config:
         extra = "ignore"
 
 
-class ModelInfo(Serializable):
-    """Class for maintaining details of model to be deployed, usually for multi-model deployment."""
-
-    model_id: str
-    gpu_count: Optional[int] = None
-    env_var: Optional[dict] = None
-
-    class Config:
-        extra = "ignore"
-
-
 class CreateModelDeploymentDetails(Serializable):
     """Class for creating aqua model deployment.
 

From 05e329b09f5ad7e7b1fdedcaa0001acc9db4117c Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Fri, 7 Feb 2025 22:58:47 +0530
Subject: [PATCH 019/124] add custom metdata to multimodel

---
 ads/aqua/model/constants.py | 2 +-
 ads/aqua/model/model.py     | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/model/constants.py b/ads/aqua/model/constants.py
index 4836d518c..9a63207c5 100644
--- a/ads/aqua/model/constants.py
+++ b/ads/aqua/model/constants.py
@@ -18,7 +18,7 @@ class ModelCustomMetadataFields(str, metaclass=ExtendedEnumMeta):
     EVALUATION_CONTAINER = "evaluation-container"
     FINETUNE_CONTAINER = "finetune-container"
     DEPLOYMENT_CONTAINER_URI = "deployment-container-uri"
-    MULTIMODEL_COUNT = "model-group-count"
+    MULTIMODEL_GROUP_COUNT = "model_group_count"
 
 
 class ModelTask(str, metaclass=ExtendedEnumMeta):
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 05a52759a..34c7a607b 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -296,6 +296,12 @@ def _create_multimodel_group(
                 description=f"Model id for {source_model.display_name} model in the multimodel group.",
                 category="Other",
             )
+            model_custom_metadata.add(
+                key=f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{model_idx}",
+                value=model_artifact_path,
+                description=f"Model path for {source_model.display_name} model in the multimodel group.",
+                category="Other",
+            )
 
         model_group_display_name = f"model_group_{datetime.now().strftime('%Y%m%d')}"
         combined_models = ", ".join(display_name_list)
@@ -310,7 +316,7 @@ def _create_multimodel_group(
             category="Other",
         )
         model_custom_metadata.add(
-            key=ModelCustomMetadataFields.MULTIMODEL_COUNT,
+            key=ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT,
             value=str(len(model_info)),
             description="Count of models grouped to create the multimodel catalog entry.",
             category="Other",

From a49afa25fe7dfe739e212d1eb22339123f080eb3 Mon Sep 17 00:00:00 2001
From: Vipul <vipul.mascarenhas@oracle.com>
Date: Sat, 8 Feb 2025 00:54:13 +0530
Subject: [PATCH 020/124] add metadata and tests

---
 ads/aqua/model/model.py                      |  8 ++-
 ads/aqua/modeldeployment/deployment.py       |  9 ++-
 tests/unitary/with_extras/aqua/test_model.py | 60 ++++++++++++++++++++
 3 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 34c7a607b..164c97ac7 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -136,7 +136,7 @@ class AquaModelApp(AquaApp):
     def create(
         self,
         model_id: Union[str, List[ModelInfo]],
-        project_id: str,
+        project_id: str = None,
         compartment_id: str = None,
         freeform_tags: Optional[dict] = None,
         defined_tags: Optional[dict] = None,
@@ -296,6 +296,12 @@ def _create_multimodel_group(
                 description=f"Model id for {source_model.display_name} model in the multimodel group.",
                 category="Other",
             )
+            model_custom_metadata.add(
+                key=f"model-name-{model_idx}",
+                value=source_model.display_name,
+                description=f"Model name for {source_model.display_name} model in the multimodel group.",
+                category="Other",
+            )
             model_custom_metadata.add(
                 key=f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{model_idx}",
                 value=model_artifact_path,
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index f657d30cd..588e7fc4f 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -122,13 +122,20 @@ def create(
 
         # Create a model catalog entry in the user compartment
         aqua_model = AquaModelApp().create(
-            model_id=create_deployment_details.model_id,
+            model_id=create_deployment_details.model_id
+            or create_deployment_details.model_info,
             compartment_id=create_deployment_details.compartment_id or COMPARTMENT_OCID,
             project_id=create_deployment_details.project_id or PROJECT_OCID,
             freeform_tags=create_deployment_details.freeform_tags,
             defined_tags=create_deployment_details.defined_tags,
         )
 
+        # todo: remove this once deployment support is added
+        if create_deployment_details.model_info:
+            raise AquaValueError(
+                "Deployment support for multimodel info is in progress."
+            )
+
         tags = {}
         for tag in [
             Tags.AQUA_SERVICE_MODEL_TAG,
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index 31ce24c03..2d6251c72 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -42,6 +42,7 @@
     AquaFileNotFoundError,
     AquaValueError,
 )
+from ads.aqua.common.entities import ModelInfo as MultiModelInfo
 from ads.model.service.oci_datascience_model import OCIDataScienceModel
 
 
@@ -353,6 +354,65 @@ def test_create_model(self, mock_from_id, mock_validate, mock_create):
         )
         assert model.provenance_metadata.training_id == "test_training_id"
 
+    @patch.object(DataScienceModel, "add_artifact")
+    @patch.object(DataScienceModel, "create")
+    @patch("ads.model.datascience_model.validate")
+    @patch.object(DataScienceModel, "from_id")
+    def test_create_multimodel(
+        self, mock_from_id, mock_validate, mock_create, mock_add_artifact
+    ):
+        mock_model = MagicMock()
+        mock_model.model_file_description = {"test_key": "test_value"}
+        mock_model.display_name = "test_display_name"
+        mock_model.description = "test_description"
+        mock_model.freeform_tags = {
+            "OCI_AQUA": "ACTIVE",
+        }
+        mock_model.id = "mock_model_id"
+        mock_model.artifact = "mock_artifact_path"
+        custom_metadata_list = ModelCustomMetadata()
+        custom_metadata_list.add(
+            **{"key": "deployment-container", "value": "odsc-vllm-serving"}
+        )
+
+        mock_model.custom_metadata_list = custom_metadata_list
+        mock_from_id.return_value = mock_model
+
+        model_info_1 = MultiModelInfo(
+            model_id="test_model_id_1",
+            gpu_count=2,
+            env_var={"params": "--trust-remote-code --max-model-len 60000"},
+        )
+
+        model_info_2 = MultiModelInfo(
+            model_id="test_model_id_2",
+            gpu_count=2,
+            env_var={"params": "--trust-remote-code --max-model-len 32000"},
+        )
+
+        # will create a multi-model group
+        model = self.app.create(
+            model_id=[model_info_1, model_info_2],
+            project_id="test_project_id",
+            compartment_id="test_compartment_id",
+        )
+
+        mock_add_artifact.assert_called()
+        mock_from_id.assert_called()
+        mock_validate.assert_not_called()
+        mock_create.assert_called_with(model_by_reference=True)
+
+        mock_model.compartment_id = TestDataset.SERVICE_COMPARTMENT_ID
+        mock_from_id.return_value = mock_model
+        mock_create.return_value = mock_model
+
+        assert model.freeform_tags == {"OCI_AQUA": "active", "multimodel": "true"}
+        assert model.custom_metadata_list.get("model_group_count").value == "2"
+        assert (
+            model.custom_metadata_list.get("deployment-container").value
+            == "odsc-vllm-serving"
+        )
+
     @pytest.mark.parametrize(
         "foundation_model_type",
         [

From 1f797625da64248806adb5e91a20bfeebd9d808d Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 7 Feb 2025 16:22:55 -0800
Subject: [PATCH 021/124] Enhance aqua deployment handler

---
 ads/aqua/extension/deployment_handler.py | 50 ++++++++++++++----------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index f16d0bf98..f374b3ab5 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -43,21 +43,15 @@ def get(self, id: Union[str, List[str]] = None):
         url_parse = urlparse(self.request.path)
         paths = url_parse.path.strip("/")
         if paths.startswith("aqua/deployments/config"):
-            if not id:
+            if not id or not isinstance(id, (list, str)):
                 raise HTTPError(
-                    400, f"The request {self.request.path} requires model id."
+                    400,
+                    f"The request to {self.request.path} must include either a single model ID or a list of model IDs.",
                 )
-            return self.get_deployment_config(id)
-        elif paths.startswith("aqua/deployments/modelconfig"):
             if isinstance(id, list):
                 return self.get_multimodel_deployment_config(id)
-            elif isinstance(id, str):
-                return self.get_deployment_config(id)
             else:
-                raise HTTPError(
-                    400,
-                    f"The request {self.request.path} requires either a model id or a list of model ids.",
-                )
+                return self.get_deployment_config(id)
         elif paths.startswith("aqua/deployments"):
             if not id:
                 return self.list()
@@ -128,18 +122,33 @@ def list(self):
             )
         )
 
-    def get_deployment_config(self, model_id):
-        """Gets the deployment config for Aqua model."""
-        return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id))
+    def get_deployment_config(self, model_id: Union[str, List[str]]):
+        """
+        Retrieves the deployment configuration for one or more Aqua models.
 
-    def get_multimodel_deployment_config(self, model_ids: List[str]):
-        """Gets the multi model deployment config and optimal GPU allocations for Aqua models."""
-        primary_model_id = self.get_argument("primary_model_id", default=None)
-        return self.finish(
-            AquaDeploymentApp().get_multimodel_deployment_config(
-                model_ids=model_ids, primary_model_id=primary_model_id
+        Parameters
+        ----------
+        model_id : Union[str, List[str]]
+            A single model ID (str) or a list of model IDs (List[str]).
+
+        Returns
+        -------
+        None
+            The function sends the deployment configuration as a response.
+        """
+        app = AquaDeploymentApp()
+
+        if isinstance(model_id, list):
+            # Handle multiple model deployment
+            primary_model_id = self.get_argument("primary_model_id", default=None)
+            deployment_config = app.get_multimodel_deployment_config(
+                model_ids=model_id, primary_model_id=primary_model_id
             )
-        )
+        else:
+            # Handle single model deployment
+            deployment_config = app.get_deployment_config(model_id=model_id)
+
+        return self.finish(deployment_config)
 
 
 class AquaDeploymentInferenceHandler(AquaAPIhandler):
@@ -257,7 +266,6 @@ def post(self, *args, **kwargs):  # noqa: ARG002
 __handlers__ = [
     ("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
     ("deployments/config/?([^/]*)", AquaDeploymentHandler),
-    ("deployments/modelconfig/?([^/]*)", AquaDeploymentHandler),
     ("deployments/?([^/]*)", AquaDeploymentHandler),
     ("deployments/?([^/]*)/activate", AquaDeploymentHandler),
     ("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),

From ee3d65d3db69db4a99cd3a70eba33d1b6cbcafeb Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Sun, 9 Feb 2025 22:24:16 -0800
Subject: [PATCH 022/124] Enhances the functionality for creating multi-model
 entities.

---
 ads/aqua/common/entities.py                  |  19 +-
 ads/aqua/model/model.py                      | 256 ++++++++++---------
 ads/aqua/modeldeployment/deployment.py       |  82 ++++--
 ads/aqua/modeldeployment/entities.py         | 187 +++++++-------
 tests/unitary/with_extras/aqua/test_model.py |  40 +--
 5 files changed, 311 insertions(+), 273 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 083835356..69bea481d 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -31,10 +31,25 @@ class Config:
         extra = "ignore"
 
 
-class ModelInfo(Serializable):
-    """Class for maintaining details of model to be deployed, usually for multi-model deployment."""
+class AquaMultiModelRef(Serializable):
+    """
+    Lightweight model descriptor used for multi-model deployment.
+
+    This class only contains essential details
+    required to fetch complete model metadata and deploy models.
+
+    Attributes
+    ----------
+    model_id : str
+        The unique identifier of the model.
+    gpu_count : Optional[int]
+        Number of GPUs required for deployment.
+    env_var : Optional[Dict[str, Any]]
+        Optional environment variables to override during deployment.
+    """
 
     model_id: str
+    model_name: Optional[str] = None
     gpu_count: Optional[int] = None
     env_var: Optional[dict] = None
 
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 164c97ac7..d552b7afd 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-import builtins
 import os
 import pathlib
 from datetime import datetime, timedelta
@@ -15,7 +14,7 @@
 
 from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
 from ads.aqua.app import AquaApp
-from ads.aqua.common.entities import ModelInfo
+from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import (
     CustomInferenceContainerTypeFamily,
     FineTuningContainerTypeFamily,
@@ -135,186 +134,185 @@ class AquaModelApp(AquaApp):
     @telemetry(entry_point="plugin=model&action=create", name="aqua")
     def create(
         self,
-        model_id: Union[str, List[ModelInfo]],
-        project_id: str = None,
-        compartment_id: str = None,
-        freeform_tags: Optional[dict] = None,
-        defined_tags: Optional[dict] = None,
+        model_id: Union[str, AquaMultiModelRef],
+        project_id: Optional[str] = None,
+        compartment_id: Optional[str] = None,
+        freeform_tags: Optional[Dict] = None,
+        defined_tags: Optional[Dict] = None,
         **kwargs,
     ) -> DataScienceModel:
-        """Creates custom aqua model from service model.
+        """
+        Creates a custom Aqua model from a service model.
 
         Parameters
         ----------
-        model_id: Union[str, List[ModelInfo]]
-            The model id to be deployed, or a list of model ids for multi-model deployment
-        project_id: str
-            The project id for custom model.
-        compartment_id: str
-            The compartment id for custom model. Defaults to None.
-            If not provided, compartment id will be fetched from environment variables.
-        freeform_tags: dict
-            Freeform tags for the model
-        defined_tags: dict
-            Defined tags for the model
+        model : Union[str, AquaMultiModelRef]
+            The model ID as a string or a AquaMultiModelRef instance to be deployed.
+        project_id : Optional[str]
+            The project ID for the custom model.
+        compartment_id : Optional[str]
+            The compartment ID for the custom model. Defaults to None.
+            If not provided, the compartment ID will be fetched from environment variables.
+        freeform_tags : Optional[Dict]
+            Freeform tags for the model.
+        defined_tags : Optional[Dict]
+            Defined tags for the model.
+
         Returns
         -------
-        DataScienceModel:
+        DataScienceModel
             The instance of DataScienceModel.
         """
-
+        model_id = (
+            model_id.model_id if isinstance(model_id, AquaMultiModelRef) else model_id
+        )
+        service_model = DataScienceModel.from_id(model_id)
         target_project = project_id or PROJECT_OCID
         target_compartment = compartment_id or COMPARTMENT_OCID
 
-        if isinstance(model_id, builtins.list):
-            return self._create_multimodel_group(
-                model_info=model_id,
-                project_id=target_project,
-                compartment_id=target_compartment,
-                freeform_tags=freeform_tags,
-                defined_tags=defined_tags,
+        if service_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID:
+            logger.info(
+                f"Aqua Model {model_id} already exists in the user's compartment."
+                "Skipped copying."
             )
-        else:
-            service_model = DataScienceModel.from_id(model_id)
+            return service_model
 
-            if service_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID:
-                logger.info(
-                    f"Aqua Model {model_id} already exists in user's compartment."
-                    "Skipped copying."
-                )
-                return service_model
-
-            # combine tags
-            combined_freeform_tags = {
-                **(service_model.freeform_tags or {}),
-                **(freeform_tags or {}),
-            }
-            combined_defined_tags = {
-                **(service_model.defined_tags or {}),
-                **(defined_tags or {}),
-            }
+        # combine tags
+        combined_freeform_tags = {
+            **(service_model.freeform_tags or {}),
+            **(freeform_tags or {}),
+        }
+        combined_defined_tags = {
+            **(service_model.defined_tags or {}),
+            **(defined_tags or {}),
+        }
 
-            custom_model = (
-                DataScienceModel()
-                .with_compartment_id(target_compartment)
-                .with_project_id(target_project)
-                .with_model_file_description(
-                    json_dict=service_model.model_file_description
-                )
-                .with_display_name(service_model.display_name)
-                .with_description(service_model.description)
-                .with_freeform_tags(**combined_freeform_tags)
-                .with_defined_tags(**combined_defined_tags)
-                .with_custom_metadata_list(service_model.custom_metadata_list)
-                .with_defined_metadata_list(service_model.defined_metadata_list)
-                .with_provenance_metadata(service_model.provenance_metadata)
-                # TODO: decide what kwargs will be needed.
-                .create(model_by_reference=True, **kwargs)
-            )
-            logger.info(
-                f"Aqua Model {custom_model.id} created with the service model {model_id}."
-            )
+        custom_model = (
+            DataScienceModel()
+            .with_compartment_id(target_compartment)
+            .with_project_id(target_project)
+            .with_model_file_description(json_dict=service_model.model_file_description)
+            .with_display_name(service_model.display_name)
+            .with_description(service_model.description)
+            .with_freeform_tags(**combined_freeform_tags)
+            .with_defined_tags(**combined_defined_tags)
+            .with_custom_metadata_list(service_model.custom_metadata_list)
+            .with_defined_metadata_list(service_model.defined_metadata_list)
+            .with_provenance_metadata(service_model.provenance_metadata)
+            .create(model_by_reference=True, **kwargs)
+        )
+        logger.info(
+            f"Aqua Model {custom_model.id} created with the service model {model_id}."
+        )
 
-            # tracks unique models that were created in the user compartment
-            self.telemetry.record_event_async(
-                category="aqua/service/model",
-                action="create",
-                detail=service_model.display_name,
-            )
+        # Track unique models that were created in the user's compartment
+        self.telemetry.record_event_async(
+            category="aqua/service/model",
+            action="create",
+            detail=service_model.display_name,
+        )
 
-            return custom_model
+        return custom_model
 
-    def _create_multimodel_group(
+    @telemetry(entry_point="plugin=model&action=create", name="aqua")
+    def create_multi(
         self,
-        model_info: List[ModelInfo],
-        project_id: str,
-        compartment_id: str = None,
-        freeform_tags: Optional[dict] = None,
-        defined_tags: Optional[dict] = None,
+        models: List[AquaMultiModelRef],
+        project_id: Optional[str] = None,
+        compartment_id: Optional[str] = None,
+        freeform_tags: Optional[Dict] = None,
+        defined_tags: Optional[Dict] = None,
+        **kwargs,  # noqa: ARG002
     ) -> DataScienceModel:
         """
-        Create a multimodel grouping using the model list.
+        Creates a multi-model grouping using the provided model list.
+
         Parameters
         ----------
-        model_info: List[ModelInfo]
-            List of model ids for creating a multimodel group.
-        project_id: str
-            The project id for multimodel group.
-        compartment_id: str
-            The compartment id for multimodel group.
-        freeform_tags: dict
-            Freeform tags for the model
-        defined_tags: dict
-            Defined tags for the model
+        models : List[AquaMultiModelRef]
+            List of AquaMultiModelRef instances for creating a multi-model group.
+        project_id : Optional[str]
+            The project ID for the multi-model group.
+        compartment_id : Optional[str]
+            The compartment ID for the multi-model group.
+        freeform_tags : Optional[Dict]
+            Freeform tags for the model.
+        defined_tags : Optional[Dict]
+            Defined tags for the model.
 
         Returns
         -------
         DataScienceModel
-            Instance of DataScienceModel object
-
+            Instance of DataScienceModel object.
         """
+
+        if not models:
+            raise AquaValueError("Model list cannot be empty.")
+
         artifact_list = []
         display_name_list = []
-        default_deployment_container = None
-        model_info_dict = {"models": []}
         model_custom_metadata = ModelCustomMetadata()
+        default_deployment_container = None
 
-        # gather individual model details for grouping
-        for model_idx, model in enumerate(model_info):
+        # Process each model
+        for idx, model in enumerate(models):
             source_model = DataScienceModel.from_id(model.model_id)
-            display_name_list.append(source_model.display_name)
+            display_name = source_model.display_name
+            display_name_list.append(display_name)
 
+            # Retrieve model artifact
             model_artifact_path = source_model.artifact
             if not model_artifact_path:
                 raise AquaValueError(
-                    f"The selected model {source_model.display_name} does not have "
-                    f"any artifacts associated with it. Please register the model first before "
-                    f"continuing to group the model."
+                    f"Model '{display_name}' (ID: {model.model_id}) has no artifacts. "
+                    "Please register the model first."
                 )
+
             artifact_list.append(model_artifact_path)
 
-            model_info_dict["models"].append(model.to_json())
+            # Validate deployment container consistency
             deployment_container = source_model.custom_metadata_list.get(
                 ModelCustomMetadataFields.DEPLOYMENT_CONTAINER,
                 ModelCustomMetadataItem(
                     key=ModelCustomMetadataFields.DEPLOYMENT_CONTAINER
                 ),
             ).value
-            if model_idx == 0:
+
+            if idx == 0:
                 default_deployment_container = deployment_container
             elif deployment_container != default_deployment_container:
-                # todo: replace this error with a logger statement once we verify that the models with default
-                #   as TGI works with the vLLM container.
                 raise AquaValueError(
-                    "Unable to proceed with deployment. "
-                    "Deployment container for the models selected should be the same."
+                    "Deployment container mismatch detected. "
+                    "All selected models must use the same deployment container."
                 )
 
+            # Add model-specific metadata
             model_custom_metadata.add(
-                key=f"model-id-{model_idx}",
+                key=f"model-id-{idx}",
                 value=source_model.id,
-                description=f"Model id for {source_model.display_name} model in the multimodel group.",
+                description=f"ID of '{display_name}' in the multimodel group.",
                 category="Other",
             )
             model_custom_metadata.add(
-                key=f"model-name-{model_idx}",
-                value=source_model.display_name,
-                description=f"Model name for {source_model.display_name} model in the multimodel group.",
+                key=f"model-name-{idx}",
+                value=display_name,
+                description=f"Name of '{display_name}' in the multimodel group.",
                 category="Other",
             )
             model_custom_metadata.add(
-                key=f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{model_idx}",
+                key=f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}",
                 value=model_artifact_path,
-                description=f"Model path for {source_model.display_name} model in the multimodel group.",
+                description=f"Artifact path for '{display_name}' in the multimodel group.",
                 category="Other",
             )
 
-        model_group_display_name = f"model_group_{datetime.now().strftime('%Y%m%d')}"
+        # Generate model group details
+        timestamp = datetime.now().strftime("%Y%m%d")
+        model_group_display_name = f"model_group_{timestamp}"
         combined_models = ", ".join(display_name_list)
-        model_group_description = (
-            f"Model grouping creating using {combined_models} models."
-        )
+        model_group_description = f"Multi-model grouping using {combined_models}."
 
+        # Add global metadata
         model_custom_metadata.add(
             key=ModelCustomMetadataFields.DEPLOYMENT_CONTAINER,
             value=default_deployment_container,
@@ -323,19 +321,20 @@ def _create_multimodel_group(
         )
         model_custom_metadata.add(
             key=ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT,
-            value=str(len(model_info)),
-            description="Count of models grouped to create the multimodel catalog entry.",
+            value=str(len(models)),
+            description="Number of models in the group.",
             category="Other",
         )
 
+        # Combine tags
         tags = {
             Tags.AQUA_TAG: "active",
             Tags.MULTIMODEL_TYPE_TAG: "true",
+            **(freeform_tags or {}),
         }
 
-        tags = {**tags, **(freeform_tags or {})}
-
-        multi_model_group = (
+        # Create multi-model group
+        custom_model = (
             DataScienceModel()
             .with_compartment_id(compartment_id)
             .with_project_id(project_id)
@@ -346,22 +345,25 @@ def _create_multimodel_group(
             .with_custom_metadata_list(model_custom_metadata)
         )
 
+        # Attach artifacts
         for artifact in artifact_list:
-            multi_model_group.add_artifact(uri=artifact)
+            custom_model.add_artifact(uri=artifact)
 
-        multi_model_group.create(model_by_reference=True)
+        # Finalize creation
+        custom_model.create(model_by_reference=True)
 
         logger.info(
-            f"Aqua Model {multi_model_group.id} created with the verified models {','.join(display_name_list)}."
+            f"Aqua Model '{custom_model.id}' created with models: {', '.join(display_name_list)}."
         )
 
-        # tracks unique models that were created in the user compartment
+        # Track telemetry event
         self.telemetry.record_event_async(
             category="aqua/multimodel",
             action="create",
             detail=combined_models,
         )
-        return multi_model_group
+
+        return custom_model
 
     @telemetry(entry_point="plugin=model&action=get", name="aqua")
     def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaModel":
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 588e7fc4f..56cf32db0 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -3,16 +3,13 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import shlex
-from typing import Dict, List, Union
+from typing import Dict, List, Optional, Union
 
 from pydantic import ValidationError
 
 from ads.aqua.app import AquaApp, logger
 from ads.aqua.common.entities import ContainerSpec
-from ads.aqua.common.enums import (
-    InferenceContainerTypeFamily,
-    Tags,
-)
+from ads.aqua.common.enums import InferenceContainerTypeFamily, Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
     build_pydantic_error_message,
@@ -91,27 +88,29 @@ class AquaDeploymentApp(AquaApp):
 
     @telemetry(entry_point="plugin=deployment&action=create", name="aqua")
     def create(
-        self, create_deployment_details: CreateModelDeploymentDetails = None, **kwargs
+        self,
+        create_deployment_details: Optional[CreateModelDeploymentDetails] = None,
+        **kwargs,
     ) -> "AquaDeployment":
         """
-        Creates a new Aqua model deployment
+        Creates a new Aqua model deployment.
 
         Parameters
         ----------
-        create_deployment_details: CreateModelDeploymentDetails
-            The CreateModelDeploymentDetails data class which contains all
-            required and optional fields to create a model deployment via Aqua.
-        kwargs:
-            The kwargs for creating CreateModelDeploymentDetails instance if
-            no create_deployment_details is provided.
+        create_deployment_details : CreateModelDeploymentDetails, optional
+            An instance of CreateModelDeploymentDetails containing all required and optional
+            fields for creating a model deployment via Aqua.
+        **kwargs:
+            Keyword arguments used to construct a CreateModelDeploymentDetails instance if one
+            is not provided.
 
         Returns
         -------
         AquaDeployment
-            An Aqua deployment instance
-
+            An Aqua deployment instance.
         """
-        if not create_deployment_details:
+        # Build deployment details from kwargs if not explicitly provided.
+        if create_deployment_details is None:
             try:
                 create_deployment_details = CreateModelDeploymentDetails(**kwargs)
             except ValidationError as ex:
@@ -120,18 +119,49 @@ def create(
                     f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
                 ) from ex
 
-        # Create a model catalog entry in the user compartment
-        aqua_model = AquaModelApp().create(
-            model_id=create_deployment_details.model_id
-            or create_deployment_details.model_info,
-            compartment_id=create_deployment_details.compartment_id or COMPARTMENT_OCID,
-            project_id=create_deployment_details.project_id or PROJECT_OCID,
-            freeform_tags=create_deployment_details.freeform_tags,
-            defined_tags=create_deployment_details.defined_tags,
-        )
+        # Extract model_id from the provided deployment details.
+        model_id = create_deployment_details.model_id
+
+        # If a single model is provided, delegate to `create` method
+        if (
+            not model_id
+            and create_deployment_details.models
+            and len(create_deployment_details.models) == 1
+        ):
+            single_model = create_deployment_details.models[0]
+            logger.info(
+                f"Single model ({single_model.model_id}) provided. "
+                "Delegating to single model creation method."
+            )
+            model_id = single_model.model_id
+
+        # Set defaults for compartment and project if not provided.
+        compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
+        project_id = create_deployment_details.project_id or PROJECT_OCID
+        freeform_tags = create_deployment_details.freeform_tags
+        defined_tags = create_deployment_details.defined_tags
+
+        # Create an AquaModelApp instance once to perform the deployment creation.
+        model_app = AquaModelApp()
+        if model_id:
+            aqua_model = model_app.create(
+                model_id=model_id,
+                compartment_id=compartment_id,
+                project_id=project_id,
+                freeform_tags=freeform_tags,
+                defined_tags=defined_tags,
+            )
+        else:
+            aqua_model = model_app.create_multi(
+                models=create_deployment_details.models,
+                compartment_id=compartment_id,
+                project_id=project_id,
+                freeform_tags=freeform_tags,
+                defined_tags=defined_tags,
+            )
 
         # todo: remove this once deployment support is added
-        if create_deployment_details.model_info:
+        if create_deployment_details.models:
             raise AquaValueError(
                 "Deployment support for multimodel info is in progress."
             )
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index a1b8e93cb..1c7ca50ac 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -2,17 +2,13 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
-from typing import List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
-from oci.data_science.models import (
-    ModelDeployment,
-    ModelDeploymentSummary,
-)
-from pydantic import Field, model_validator
+from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
+from pydantic import BaseModel, Field, model_validator
 
-from ads.aqua.common.entities import ModelInfo, ShapeInfo
+from ads.aqua.common.entities import AquaMultiModelRef, ShapeInfo
 from ads.aqua.common.enums import Tags
-from ads.aqua.common.errors import AquaValueError
 from ads.aqua.config.utils.serializer import Serializable
 from ads.aqua.constants import UNKNOWN, UNKNOWN_DICT
 from ads.aqua.data import AquaResourceIdentifier
@@ -144,99 +140,94 @@ class Config:
         extra = "ignore"
 
 
-class CreateModelDeploymentDetails(Serializable):
-    """Class for creating aqua model deployment.
-
-    Properties
-    ----------
-    compartment_id: str
-        The compartment OCID
-    project_id: str
-        Target project to list deployments from.
-    display_name: str
-        The name of model deployment.
-    description: str
-        The description of the deployment.
-    model_id: (str, optional)
-        The model OCID to deploy. Either model_id or model_info should be set.
-    model_info: (List[ModelInfo], optional)
-        The model info to deploy, used for multimodel deployment. Either model_id or model_info should be set.
-    instance_count: (int, optional). Defaults to 1.
-        The number of instance used for deployment.
-    instance_shape: (str).
-        The shape of the instance used for deployment.
-    log_group_id: (str)
-        The oci logging group id. The access log and predict log share the same log group.
-    access_log_id: (str).
-        The access log OCID for the access logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
-    predict_log_id: (str).
-        The predict log OCID for the predict logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm
-    bandwidth_mbps: (int). Defaults to 10.
-        The bandwidth limit on the load balancer in Mbps.
-    web_concurrency: str
-        The number of worker processes/threads to handle incoming requests
-    with_bucket_uri(bucket_uri)
-        Sets the bucket uri when uploading large size model.
-    server_port: (int).
-        The server port for docker container image.
-    health_check_port: (int).
-        The health check port for docker container image.
-    env_var : dict, optional
-        Environment variable for the deployment, by default None.
-    container_family: str
-        The image family of model deployment container runtime.
-    memory_in_gbs: float
-        The memory in gbs for the shape selected.
-    ocpus: float
-        The ocpu count for the shape selected.
-    model_file: str
-        The file used for model deployment.
-    private_endpoint_id: str
-        The private endpoint id of model deployment.
-    container_image_uri: str
-        The image of model deployment container runtime, ignored for service managed containers.
-        Required parameter for BYOC based deployments if this parameter was not set during model registration.
-    cmd_var: List[str]
-        The cmd of model deployment container runtime.
-    freeform_tags: dict
-        Freeform tags for the model deployment
-    defined_tags: dict
-        Defined tags for the model deployment
-    """
-
-    instance_shape: str
-    display_name: str
-    model_id: Optional[str] = None
-    model_info: Optional[List[ModelInfo]] = None
-    instance_count: Optional[int] = None
-    log_group_id: Optional[str] = None
-    access_log_id: Optional[str] = None
-    predict_log_id: Optional[str] = None
-    compartment_id: Optional[str] = None
-    project_id: Optional[str] = None
-    description: Optional[str] = None
-    bandwidth_mbps: Optional[int] = None
-    web_concurrency: Optional[int] = None
-    server_port: Optional[int] = None
-    health_check_port: Optional[int] = None
-    env_var: Optional[dict] = None
-    container_family: Optional[str] = None
-    memory_in_gbs: Optional[float] = None
-    ocpus: Optional[float] = None
-    model_file: Optional[str] = None
-    private_endpoint_id: Optional[str] = None
-    container_image_uri: Optional[None] = None
-    cmd_var: Optional[List[str]] = None
-    freeform_tags: Optional[dict] = None
-    defined_tags: Optional[dict] = None
+class CreateModelDeploymentDetails(BaseModel):
+    """Class for creating Aqua model deployments."""
+
+    instance_shape: str = Field(
+        ..., description="The instance shape used for deployment."
+    )
+    display_name: str = Field(..., description="The name of the model deployment.")
+    compartment_id: Optional[str] = Field(None, description="The compartment OCID.")
+    project_id: Optional[str] = Field(None, description="The project OCID.")
+    description: Optional[str] = Field(
+        None, description="The description of the deployment."
+    )
+    model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
+    models: Optional[List[AquaMultiModelRef]] = Field(
+        None, description="List of models for multimodel deployment."
+    )
+    instance_count: int = Field(
+        None, description="Number of instances used for deployment."
+    )
+    log_group_id: Optional[str] = Field(
+        None, description="OCI logging group ID for logs."
+    )
+    access_log_id: Optional[str] = Field(
+        None,
+        description="OCID for access logs. "
+        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
+    )
+    predict_log_id: Optional[str] = Field(
+        None,
+        description="OCID for prediction logs."
+        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
+    )
+    bandwidth_mbps: Optional[int] = Field(
+        None, description="Bandwidth limit on the load balancer in Mbps."
+    )
+    web_concurrency: Optional[int] = Field(
+        None, description="Number of worker processes/threads for handling requests."
+    )
+    server_port: Optional[int] = Field(
+        None, description="Server port for the Docker container image."
+    )
+    health_check_port: Optional[int] = Field(
+        None, description="Health check port for the Docker container image."
+    )
+    env_var: Optional[Dict[str, str]] = Field(
+        default_factory=dict, description="Environment variables for deployment."
+    )
+    container_family: Optional[str] = Field(
+        None, description="Image family of the model deployment container runtime."
+    )
+    memory_in_gbs: Optional[float] = Field(
+        None, description="Memory (in GB) for the selected shape."
+    )
+    ocpus: Optional[float] = Field(
+        None, description="OCPU count for the selected shape."
+    )
+    model_file: Optional[str] = Field(
+        None, description="File used for model deployment."
+    )
+    private_endpoint_id: Optional[str] = Field(
+        None, description="Private endpoint ID for model deployment."
+    )
+    container_image_uri: Optional[str] = Field(
+        None,
+        description="Image URI for model deployment container runtime "
+        "(ignored for service-managed containers). "
+        "Required parameter for BYOC based deployments if this parameter was not set during "
+        "model registration.",
+    )
+    cmd_var: Optional[List[str]] = Field(
+        default_factory=list, description="Command variables for the container runtime."
+    )
+    freeform_tags: Optional[Dict[str, str]] = Field(
+        default_factory=dict, description="Freeform tags for model deployment."
+    )
+    defined_tags: Optional[Dict[str, Dict[str, str]]] = Field(
+        default_factory=dict, description="Defined tags for model deployment."
+    )
 
     @model_validator(mode="before")
     @classmethod
-    def validate_model_fields(cls, values):
-        model_id, model_info = values.get("model_id"), values.get("model_info")
-        if bool(model_id) == bool(model_info):  # either both are set or unset
-            raise AquaValueError(
-                "Exactly one of `model_id` or `model_info` must be set to create a model deployment"
+    def validate(cls, values: Any) -> Any:
+        """Ensures exactly one of `model_id` or `models` is provided."""
+        model_id = values.get("model_id")
+        models = values.get("models")
+        if bool(model_id) == bool(models):  # Both set or both unset
+            raise ValueError(
+                "Exactly one of `model_id` or `models` must be provided to create a model deployment."
             )
         return values
 
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index 2d6251c72..338237c84 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -3,33 +3,39 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+import json
 import os
 import shlex
 import tempfile
-import json
 from dataclasses import asdict
 from importlib import reload
 from unittest.mock import MagicMock, patch
 
 import oci
-from ads.aqua.constants import HF_METADATA_FOLDER
 import pytest
-from ads.aqua.ui import ModelFormat
-from parameterized import parameterized
 from huggingface_hub.hf_api import HfApi, ModelInfo
+from parameterized import parameterized
 
 import ads.aqua.model
+import ads.common
+import ads.common.oci_client
+import ads.config
+from ads.aqua.common.entities import AquaMultiModelRef
+from ads.aqua.common.errors import (
+    AquaFileNotFoundError,
+    AquaRuntimeError,
+    AquaValueError,
+)
+from ads.aqua.common.utils import get_hf_model_info
+from ads.aqua.constants import HF_METADATA_FOLDER
+from ads.aqua.model import AquaModelApp
 from ads.aqua.model.entities import (
+    AquaModel,
     AquaModelSummary,
     ImportModelDetails,
-    AquaModel,
     ModelValidationResult,
 )
-from ads.aqua.common.utils import get_hf_model_info
-import ads.common
-import ads.common.oci_client
-import ads.config
-from ads.aqua.model import AquaModelApp
+from ads.aqua.ui import ModelFormat
 from ads.common.object_storage_details import ObjectStorageDetails
 from ads.model.datascience_model import DataScienceModel
 from ads.model.model_metadata import (
@@ -37,12 +43,6 @@
     ModelProvenanceMetadata,
     ModelTaxonomyMetadata,
 )
-from ads.aqua.common.errors import (
-    AquaRuntimeError,
-    AquaFileNotFoundError,
-    AquaValueError,
-)
-from ads.aqua.common.entities import ModelInfo as MultiModelInfo
 from ads.model.service.oci_datascience_model import OCIDataScienceModel
 
 
@@ -378,21 +378,21 @@ def test_create_multimodel(
         mock_model.custom_metadata_list = custom_metadata_list
         mock_from_id.return_value = mock_model
 
-        model_info_1 = MultiModelInfo(
+        model_info_1 = AquaMultiModelRef(
             model_id="test_model_id_1",
             gpu_count=2,
             env_var={"params": "--trust-remote-code --max-model-len 60000"},
         )
 
-        model_info_2 = MultiModelInfo(
+        model_info_2 = AquaMultiModelRef(
             model_id="test_model_id_2",
             gpu_count=2,
             env_var={"params": "--trust-remote-code --max-model-len 32000"},
         )
 
         # will create a multi-model group
-        model = self.app.create(
-            model_id=[model_info_1, model_info_2],
+        model = self.app.create_multi(
+            models=[model_info_1, model_info_2],
             project_id="test_project_id",
             compartment_id="test_compartment_id",
         )

From b9c22a3bd5e5e717c6b43afef7b758f07d4cf553 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Feb 2025 13:52:17 -0500
Subject: [PATCH 023/124] Modify get deployment config to pydantic class.

---
 ads/aqua/modeldeployment/deployment.py        |  67 +++++-----
 ads/aqua/modeldeployment/entities.py          | 117 ++++++++++++------
 .../with_extras/aqua/test_deployment.py       |  63 ++++++++--
 3 files changed, 167 insertions(+), 80 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 883a68c39..45f47dca9 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -44,6 +44,7 @@
     AquaDeployment,
     AquaDeploymentConfig,
     AquaDeploymentDetail,
+    ConfigurationItem,
     CreateModelDeploymentDetails,
     GPUModelAllocation,
     GPUShapeAllocation,
@@ -85,7 +86,7 @@ class AquaDeploymentApp(AquaApp):
         Retrieves details of an Aqua model deployment by its unique identifier.
     list(**kwargs) -> List[AquaModelSummary]:
         Lists all Aqua deployments within a specified compartment and/or project.
-    get_deployment_config(self, model_id: str) -> Dict:
+    get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
         Gets the deployment config of given Aqua model.
 
     Note:
@@ -294,12 +295,9 @@ def create(
 
         deployment_config = self.get_deployment_config(config_source_id)
 
-        config_params = (
-            deployment_config.get("configuration", UNKNOWN_DICT)
-            .get(create_deployment_details.instance_shape, UNKNOWN_DICT)
-            .get("parameters", UNKNOWN_DICT)
-            .get(get_container_params_type(container_type_key), UNKNOWN)
-        )
+        config_params = deployment_config.configuration.get(
+            create_deployment_details.instance_shape, ConfigurationItem()
+        ).parameters.get(get_container_params_type(container_type_key), UNKNOWN)
 
         # validate user provided params
         user_params = env_var.get("PARAMS", UNKNOWN)
@@ -607,7 +605,7 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail":
     @telemetry(
         entry_point="plugin=deployment&action=get_deployment_config", name="aqua"
     )
-    def get_deployment_config(self, model_id: str) -> Dict:
+    def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
         """Gets the deployment config of given Aqua model.
 
         Parameters
@@ -617,15 +615,15 @@ def get_deployment_config(self, model_id: str) -> Dict:
 
         Returns
         -------
-        Dict:
-            A dict of allowed deployment configs.
+        AquaDeploymentConfig:
+            An instance of AquaDeploymentConfig.
         """
         config = self.get_config(model_id, AQUA_MODEL_DEPLOYMENT_CONFIG)
         if not config:
             logger.debug(
                 f"Deployment config for custom model: {model_id} is not available. Use defaults."
             )
-        return config
+        return AquaDeploymentConfig(**(config or UNKNOWN_DICT))
 
     @telemetry(
         entry_point="plugin=deployment&action=get_multimodel_compatible_shapes",
@@ -662,15 +660,13 @@ def get_multimodel_compatible_shapes(
         deployment = {}
         model_shape_gpu = {}
         for model_id in model_ids:
-            deployment_config = AquaDeploymentConfig(
-                **self.get_deployment_config(model_id=model_id)
-            )
+            deployment_config = self.get_deployment_config(model_id=model_id)
             model_shape_gpu[model_id] = {
                 shape: [
                     item.gpu_count
-                    for item in deployment_config.configuration[
-                        shape
-                    ].multi_model_deployment
+                    for item in deployment_config.configuration.get(
+                        shape, ConfigurationItem()
+                    ).multi_model_deployment
                 ]
                 for shape in deployment_config.shape
             }
@@ -680,7 +676,9 @@ def get_multimodel_compatible_shapes(
                     model_id: {
                         "shape": deployment_config.shape,
                         "configuration": {
-                            shape: deployment_config.configuration[shape]
+                            shape: deployment_config.configuration.get(
+                                shape, ConfigurationItem()
+                            )
                             for shape in deployment_config.shape
                         },
                     }
@@ -688,7 +686,11 @@ def get_multimodel_compatible_shapes(
             )
 
         common_shapes = []
-        for shape_gpu in model_shape_gpu.values():
+        for model, shape_gpu in model_shape_gpu.items():
+            if not shape_gpu:
+                raise AquaValueError(
+                    f"There are no available shapes for model {model}, please select different model to deploy."
+                )
             if not common_shapes:
                 common_shapes = list(shape_gpu.keys())
             else:
@@ -706,7 +708,12 @@ def get_multimodel_compatible_shapes(
             model_gpu = {
                 model: shape_gpu[common_shape]
                 for model, shape_gpu in model_shape_gpu.items()
+                if shape_gpu[common_shape]
             }
+
+            if len(model_gpu) != len(model_ids):
+                continue
+
             is_compatible, maximum_gpu_count, combination = self._verify_compatibility(
                 model_gpu, primary_model_id
             )
@@ -849,26 +856,24 @@ def get_deployment_default_params(
         ):
             deployment_config = self.get_deployment_config(model_id)
 
-            instance_shape_config = deployment_config.get(
-                "configuration", UNKNOWN_DICT
-            ).get(instance_shape, UNKNOWN_DICT)
+            instance_shape_config = deployment_config.configuration.get(
+                instance_shape, ConfigurationItem()
+            )
 
-            if "multi_model_deployment" in instance_shape_config and gpu_count:
-                gpu_params = instance_shape_config.get(
-                    "multi_model_deployment", UNKNOWN_DICT
-                )
+            if instance_shape_config.multi_model_deployment and gpu_count:
+                gpu_params = instance_shape_config.multi_model_deployment
 
                 for gpu_config in gpu_params:
-                    if gpu_config["gpu_count"] == gpu_count:
-                        config_params = gpu_config.get("parameters", UNKNOWN_DICT).get(
+                    if gpu_config.gpu_count == gpu_count:
+                        config_params = gpu_config.parameters.get(
                             get_container_params_type(container_type_key), UNKNOWN
                         )
                         break
 
             else:
-                config_params = instance_shape_config.get(
-                    "parameters", UNKNOWN_DICT
-                ).get(get_container_params_type(container_type_key), UNKNOWN)
+                config_params = instance_shape_config.parameters.get(
+                    get_container_params_type(container_type_key), UNKNOWN
+                )
 
             if config_params:
                 params_list = get_params_list(config_params)
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index e8f170e33..2921c68af 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -255,16 +255,57 @@ class Config:
         extra = "ignore"
 
 
+class ShapeInfoConfig(Serializable):
+    """Describes how many memory and cpu to this model for specific shape.
+
+    Attributes:
+        memory_in_gbs (int, optional): The number of memory in gbs to this model of the shape.
+        ocpu (int, optional): The number of ocpus to this model of the shape.
+    """
+
+    memory_in_gbs: Optional[int] = Field(
+        default_factory=int,
+        description="The number of memory in gbs to this model of the shape.",
+    )
+    ocpu: Optional[int] = Field(
+        default_factory=int,
+        description="The number of ocpus to this model of the shape.",
+    )
+
+    class Config:
+        extra = "allow"
+
+
+class ShapeInfo(Serializable):
+    """Describes the shape information to this model for specific shape.
+
+    Attributes:
+        configs (List[ShapeInfoConfig], optional): A list of memory and cpu number details to this model of the shape.
+        type (str, optional): The type of the shape.
+    """
+
+    configs: Optional[List[ShapeInfoConfig]] = Field(
+        default_factory=list,
+        description="A list of memory and cpu number details to this model of the shape.",
+    )
+    type: Optional[str] = Field(
+        default_factory=str, description="The type of the shape."
+    )
+
+    class Config:
+        extra = "allow"
+
+
 class MultiModelConfig(Serializable):
     """Describes how many GPUs and the parameters of specific shape for multi model deployment.
 
     Attributes:
-        gpu_count (int): Number of GPUs count to this model of this shape.
+        gpu_count (int, optional): Number of GPUs count to this model of this shape.
         parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
             configure the behavior of a particular GPU shape.
     """
 
-    gpu_count: int = Field(
+    gpu_count: Optional[int] = Field(
         default_factory=int, description="The number of GPUs allocated to the model."
     )
     parameters: Optional[Dict[str, str]] = Field(
@@ -273,115 +314,109 @@ class MultiModelConfig(Serializable):
     )
 
     class Config:
-        extra = "ignore"
+        extra = "allow"
 
 
 class ConfigurationItem(Serializable):
-    """Holds key-value parameter pairs for a specific GPU shape.
+    """Holds key-value parameter pairs for a specific GPU or CPU shape.
 
     Attributes:
         parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
             configure the behavior of a particular GPU shape.
         multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
+        shape_info (ShapeInfo, optional): The shape information to this model for specific CPU shape.
     """
 
     parameters: Optional[Dict[str, str]] = Field(
         default_factory=dict,
-        description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).",
+        description="Key-value pairs for shape parameters.",
     )
     multi_model_deployment: Optional[List[MultiModelConfig]] = Field(
         default_factory=list, description="A list of multi model configuration details."
     )
+    shape_info: Optional[ShapeInfo] = Field(
+        default_factory=ShapeInfo,
+        description="The shape information to this model for specific shape",
+    )
 
     class Config:
-        extra = "ignore"
+        extra = "allow"
 
 
-class ModelDeploymentConfig(Serializable):
-    """Represents one model's shape list and detailed configuration.
+class AquaDeploymentConfig(Serializable):
+    """Represents multi model's shape list and detailed configuration.
 
     Attributes:
-        shape (List[str]): A list of shape names (e.g., BM.GPU.A10.4).
-        configuration (Dict[str, ConfigurationItem]): Maps each shape to its configuration details.
+        shape (List[str], optional): A list of shape names (e.g., BM.GPU.A10.4).
+        configuration (Dict[str, ConfigurationItem], optional): Maps each shape to its configuration details.
     """
 
-    shape: List[str] = Field(
+    shape: Optional[List[str]] = Field(
         default_factory=list, description="List of supported shapes for the model."
     )
-    configuration: Dict[str, ConfigurationItem] = Field(
+    configuration: Optional[Dict[str, ConfigurationItem]] = Field(
         default_factory=dict, description="Configuration details keyed by shape."
     )
 
     class Config:
-        extra = "ignore"
-
-
-class AquaDeploymentConfig(ModelDeploymentConfig):
-    """Represents multi model's shape list and detailed configuration.
-
-    Attributes:
-        shape (List[str]): A list of shape names (e.g., BM.GPU.A10.4).
-        configuration (Dict[str, ConfigurationItem]): Maps each shape to its configuration details.
-    """
-
-    configuration: Dict[str, ConfigurationItem] = Field(
-        default_factory=dict, description="Configuration details keyed by shape."
-    )
+        extra = "allow"
 
 
 class GPUModelAllocation(Serializable):
     """Describes how many GPUs are allocated to a particular model.
 
     Attributes:
-        ocid (str): The unique identifier of the model.
-        gpu_count (int): Number of GPUs allocated to this model.
+        ocid (str, optional): The unique identifier of the model.
+        gpu_count (int, optional): Number of GPUs allocated to this model.
     """
 
-    ocid: str = Field(default_factory=str, description="The unique model OCID.")
-    gpu_count: int = Field(
+    ocid: Optional[str] = Field(
+        default_factory=str, description="The unique model OCID."
+    )
+    gpu_count: Optional[int] = Field(
         default_factory=int, description="The number of GPUs allocated to the model."
     )
 
     class Config:
-        extra = "ignore"
+        extra = "allow"
 
 
 class GPUShapeAllocation(Serializable):
     """Allocation details for a specific GPU shape.
 
     Attributes:
-        models (List[GPUModelAllocation]): List of model GPU allocations for this shape.
-        total_gpus_available (int): The total number of GPUs available for this shape.
+        models (List[GPUModelAllocation], optional): List of model GPU allocations for this shape.
+        total_gpus_available (int, optional): The total number of GPUs available for this shape.
     """
 
-    models: List[GPUModelAllocation] = Field(
+    models: Optional[List[GPUModelAllocation]] = Field(
         default_factory=list, description="List of model allocations for this shape."
     )
-    total_gpus_available: int = Field(
+    total_gpus_available: Optional[int] = Field(
         default_factory=int, description="Total GPUs available for this shape."
     )
 
     class Config:
-        extra = "ignore"
+        extra = "allow"
 
 
 class ModelDeploymentConfigSummary(Serializable):
     """Top-level configuration model for OCI-based deployments.
 
     Attributes:
-        deployment_config (Dict[str, ModelDeploymentConfig]): Deployment configurations
+        deployment_config (Dict[str, AquaDeploymentConfig], optional): Deployment configurations
             keyed by model OCID.
-        gpu_allocation (Dict[str, GPUShapeAllocation]): GPU allocations keyed by GPU shape.
+        gpu_allocation (Dict[str, GPUShapeAllocation], optional): GPU allocations keyed by GPU shape.
     """
 
-    deployment_config: Dict[str, ModelDeploymentConfig] = Field(
+    deployment_config: Optional[Dict[str, AquaDeploymentConfig]] = Field(
         default_factory=dict,
         description=(
             "Deployment configuration details for each model, including supported shapes "
             "and shape-specific parameters."
         ),
     )
-    gpu_allocation: Dict[str, GPUShapeAllocation] = Field(
+    gpu_allocation: Optional[Dict[str, GPUShapeAllocation]] = Field(
         default_factory=dict,
         description=(
             "Details on how GPUs are allocated per shape, including the total "
@@ -390,4 +425,4 @@ class ModelDeploymentConfigSummary(Serializable):
     )
 
     class Config:
-        extra = "ignore"
+        extra = "allow"
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index b99ee2050..86fdd92de 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -20,6 +20,7 @@
 from ads.aqua.modeldeployment import AquaDeploymentApp, MDInferenceResponse
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
+    AquaDeploymentConfig,
     AquaDeploymentDetail,
     ModelParams,
 )
@@ -350,6 +351,7 @@ class TestDataset:
                                 },
                             }
                         ],
+                        "shape_info": {"configs": [], "type": ""},
                     },
                     "VM.GPU.A10.4": {
                         "parameters": {
@@ -364,6 +366,7 @@ class TestDataset:
                             },
                             {"gpu_count": 4, "parameters": {}},
                         ],
+                        "shape_info": {"configs": [], "type": ""},
                     },
                     "BM.GPU.A100-v2.8": {
                         "parameters": {
@@ -389,6 +392,7 @@ class TestDataset:
                                 },
                             },
                         ],
+                        "shape_info": {"configs": [], "type": ""},
                     },
                     "BM.GPU.H100.8": {
                         "parameters": {
@@ -399,6 +403,7 @@ class TestDataset:
                             {"gpu_count": 2, "parameters": {}},
                             {"gpu_count": 8, "parameters": {}},
                         ],
+                        "shape_info": {"configs": [], "type": ""},
                     },
                 },
             }
@@ -545,11 +550,13 @@ def test_get_deployment_config(self):
 
         self.app.get_config = MagicMock(return_value=config)
         result = self.app.get_deployment_config(TestDataset.MODEL_ID)
-        assert result == config
+        expected_config = AquaDeploymentConfig(**config)
+        assert result == expected_config
 
         self.app.get_config = MagicMock(return_value=None)
         result = self.app.get_deployment_config(TestDataset.MODEL_ID)
-        assert result == None
+        expected_config = AquaDeploymentConfig(**{})
+        assert result == expected_config
 
     def test_get_multimodel_compatible_shapes(self):
         config_json = os.path.join(
@@ -559,7 +566,9 @@ def test_get_multimodel_compatible_shapes(self):
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        self.app.get_deployment_config = MagicMock(return_value=config)
+        self.app.get_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(**config)
+        )
         result = self.app.get_multimodel_compatible_shapes(["model_a"])
 
         assert (
@@ -567,6 +576,34 @@ def test_get_multimodel_compatible_shapes(self):
             == TestDataset.aqua_deployment_multi_model_config_summary
         )
 
+    @parameterized.expand(
+        [
+            [
+                "shape",
+                "There are no available shapes for model model_a, please select different model to deploy.",
+            ],
+            [
+                "configuration",
+                "There are no available gpu allocations for models selected at this moment, please select different model to deploy.",
+            ],
+        ]
+    )
+    def test_get_multimodel_compatible_shapes_invalid_config(self, missing_key, error):
+        config_json = os.path.join(
+            self.curr_dir,
+            "test_data/deployment/aqua_multi_model_deployment_config.json",
+        )
+        with open(config_json, "r") as _file:
+            config = json.load(_file)
+
+        config.pop(missing_key)
+
+        self.app.get_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(**config)
+        )
+        with pytest.raises(AquaValueError, match=error):
+            self.app.get_multimodel_compatible_shapes(["model_a"])
+
     def test_verify_compatibility(self):
         result = self.app._verify_compatibility(TestDataset.model_gpu_dict)
 
@@ -615,7 +652,9 @@ def test_create_deployment_for_foundation_model(
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        self.app.get_deployment_config = MagicMock(return_value=config)
+        self.app.get_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(**config)
+        )
 
         freeform_tags = {"ftag1": "fvalue1", "ftag2": "fvalue2"}
         defined_tags = {"dtag1": "dvalue1", "dtag2": "dvalue2"}
@@ -696,7 +735,9 @@ def test_create_deployment_for_fine_tuned_model(
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        self.app.get_deployment_config = MagicMock(return_value=config)
+        self.app.get_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(**config)
+        )
 
         container_index_json = os.path.join(
             self.curr_dir, "test_data/ui/container_index.json"
@@ -768,7 +809,9 @@ def test_create_deployment_for_gguf_model(
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        self.app.get_deployment_config = MagicMock(return_value=config)
+        self.app.get_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(**config)
+        )
 
         container_index_json = os.path.join(
             self.curr_dir, "test_data/ui/container_index.json"
@@ -847,7 +890,9 @@ def test_create_deployment_for_tei_byoc_embedding_model(
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        self.app.get_deployment_config = MagicMock(return_value=config)
+        self.app.get_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(**config)
+        )
 
         container_index_json = os.path.join(
             self.curr_dir, "test_data/ui/container_index.json"
@@ -979,7 +1024,9 @@ def test_get_deployment_default_params(
         mock_model.custom_metadata_list = custom_metadata_list
         mock_from_id.return_value = mock_model
 
-        self.app.get_deployment_config = MagicMock(return_value=config)
+        self.app.get_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(**config)
+        )
 
         result = self.app.get_deployment_default_params(
             TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count

From d0b0704841db8e9d355d057dfd6850a5d57c79d5 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Feb 2025 15:18:30 -0500
Subject: [PATCH 024/124] Updated pr.

---
 ads/aqua/modeldeployment/utils.py             | 19 ++++++++++--
 .../with_extras/aqua/test_deployment.py       | 30 ++++++++++++-------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 738308188..099915e1f 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -12,6 +12,7 @@
 from ads.aqua.common.errors import AquaValueError
 from ads.aqua.modeldeployment.entities import (
     AquaDeploymentConfig,
+    ConfigurationItem,
     GPUModelAllocation,
     GPUShapeAllocation,
     ModelDeploymentConfigSummary,
@@ -65,6 +66,12 @@ def load(
         deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
         model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
 
+        for model, shape_gpu in model_shape_gpu.items():
+            if not shape_gpu:
+                raise AquaValueError(
+                    f"There are no available shapes for model {model}, please select different model to deploy."
+                )
+
         common_shapes = self._get_common_shapes(model_shape_gpu)
         if not common_shapes:
             raise AquaValueError(
@@ -111,14 +118,17 @@ def _extract_model_shape_gpu(
             model_shape_gpu[model_id] = {
                 shape: [
                     item.gpu_count
-                    for item in config.configuration[shape].multi_model_deployment
+                    for item in config.configuration.get(
+                        shape, ConfigurationItem()
+                    ).multi_model_deployment
                 ]
                 for shape in config.shape
             }
             deployment[model_id] = {
                 "shape": config.shape,
                 "configuration": {
-                    shape: config.configuration[shape] for shape in config.shape
+                    shape: config.configuration.get(shape, ConfigurationItem())
+                    for shape in config.shape
                 },
             }
 
@@ -147,7 +157,12 @@ def _compute_gpu_allocation(
             model_gpu = {
                 model: shape_gpu[common_shape]
                 for model, shape_gpu in model_shape_gpu.items()
+                if shape_gpu[common_shape]
             }
+
+            if len(model_gpu) != len(model_shape_gpu):
+                continue
+
             is_compatible, max_gpu_count, combination = self._verify_compatibility(
                 model_gpu, primary_model_id
             )
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 755916f59..007df5f2a 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -559,7 +559,12 @@ def test_get_deployment_config(self):
         expected_config = AquaDeploymentConfig(**{})
         assert result == expected_config
 
-    def test_get_multimodel_deployment_config(self):
+    @patch(
+        "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
+    )
+    def test_get_multimodel_deployment_config(
+        self, mock_fetch_deployment_configs_concurrently
+    ):
         config_json = os.path.join(
             self.curr_dir,
             "test_data/deployment/aqua_multi_model_deployment_config.json",
@@ -567,9 +572,9 @@ def test_get_multimodel_deployment_config(self):
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        self.app.get_deployment_config = MagicMock(
-            return_value=AquaDeploymentConfig(**config)
-        )
+        mock_fetch_deployment_configs_concurrently.return_value = {
+            "model_a": AquaDeploymentConfig(**config)
+        }
         result = self.app.get_multimodel_deployment_config(["model_a"])
 
         assert (
@@ -585,11 +590,16 @@ def test_get_multimodel_deployment_config(self):
             ],
             [
                 "configuration",
-                "There are no available gpu allocations for models selected at this moment, please select different model to deploy.",
+                "No available GPU allocations. Choose a different model.",
             ],
         ]
     )
-    def test_get_multimodel_compatible_shapes_invalid_config(self, missing_key, error):
+    @patch(
+        "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
+    )
+    def test_get_multimodel_compatible_shapes_invalid_config(
+        self, missing_key, error, mock_fetch_deployment_configs_concurrently
+    ):
         config_json = os.path.join(
             self.curr_dir,
             "test_data/deployment/aqua_multi_model_deployment_config.json",
@@ -599,11 +609,11 @@ def test_get_multimodel_compatible_shapes_invalid_config(self, missing_key, erro
 
         config.pop(missing_key)
 
-        self.app.get_deployment_config = MagicMock(
-            return_value=AquaDeploymentConfig(**config)
-        )
+        mock_fetch_deployment_configs_concurrently.return_value = {
+            "model_a": AquaDeploymentConfig(**config)
+        }
         with pytest.raises(AquaValueError, match=error):
-            self.app.get_multimodel_compatible_shapes(["model_a"])
+            self.app.get_multimodel_deployment_config(["model_a"])
 
     def test_verify_compatibility(self):
         result = self.app._verify_compatibility(TestDataset.model_gpu_dict)

From bddfc41e4bc6ff5d1ae8e3dcc5fbd87faba76971 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Feb 2025 16:04:03 -0500
Subject: [PATCH 025/124] Fixed unit tests.

---
 ads/aqua/extension/deployment_handler.py          |  5 +----
 tests/unitary/with_extras/aqua/test_deployment.py | 11 ++++++++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index f374b3ab5..75f00adfd 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -48,10 +48,7 @@ def get(self, id: Union[str, List[str]] = None):
                     400,
                     f"The request to {self.request.path} must include either a single model ID or a list of model IDs.",
                 )
-            if isinstance(id, list):
-                return self.get_multimodel_deployment_config(id)
-            else:
-                return self.get_deployment_config(id)
+            return self.get_deployment_config(id)
         elif paths.startswith("aqua/deployments"):
             if not id:
                 return self.list()
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 007df5f2a..94eb0b44d 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -13,6 +13,7 @@
 
 import oci
 import pytest
+from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
 from parameterized import parameterized
 
 import ads.aqua.modeldeployment.deployment
@@ -616,13 +617,15 @@ def test_get_multimodel_compatible_shapes_invalid_config(
             self.app.get_multimodel_deployment_config(["model_a"])
 
     def test_verify_compatibility(self):
-        result = self.app._verify_compatibility(TestDataset.model_gpu_dict)
+        result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(
+            TestDataset.model_gpu_dict
+        )
 
         assert result[0] == True
         assert result[1] == 8
         assert len(result[2]) == 3
 
-        result = self.app._verify_compatibility(
+        result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(
             model_gpu_dict=TestDataset.model_gpu_dict, primary_model_id="model_b"
         )
 
@@ -635,7 +638,9 @@ def test_verify_compatibility(self):
                 # model_b gets the maximum gpu count
                 assert item.gpu_count == 4
 
-        result = self.app._verify_compatibility(TestDataset.incompatible_model_gpu_dict)
+        result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(
+            TestDataset.incompatible_model_gpu_dict
+        )
 
         assert result[0] == False
         assert result[1] == 0

From 63667f44761528cc0288f04d7255397b571a5e90 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Feb 2025 16:30:25 -0500
Subject: [PATCH 026/124] Fixed unit test.

---
 tests/unitary/with_extras/aqua/test_deployment_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index 8ffedacca..b6c60fb49 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -96,7 +96,7 @@ def test_get_multimodel_deployment_config(
         self, mock_get_multimodel_deployment_config
     ):
         """Test get method to return multi model deployment config"""
-        self.deployment_handler.request.path = "aqua/deployments/modelconfig"
+        self.deployment_handler.request.path = "aqua/deployments/config"
         self.deployment_handler.get(id=["mock-model-id-one", "mock-model-id-two"])
         mock_get_multimodel_deployment_config.assert_called_with(
             model_ids=["mock-model-id-one", "mock-model-id-two"], primary_model_id=None

From bc3978c99c835b024ac1a118dabb14954a6b95b8 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Feb 2025 17:07:05 -0500
Subject: [PATCH 027/124] Updated name.

---
 ads/aqua/modeldeployment/entities.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 2d4f9a6e7..8766829f2 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -258,7 +258,7 @@ class Config:
         extra = "allow"
 
 
-class ShapeInfo(Serializable):
+class DeploymentShapeInfo(Serializable):
     """Describes the shape information to this model for specific shape.
 
     Attributes:
@@ -306,7 +306,7 @@ class ConfigurationItem(Serializable):
         parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to
             configure the behavior of a particular GPU shape.
         multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details.
-        shape_info (ShapeInfo, optional): The shape information to this model for specific CPU shape.
+        shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape.
     """
 
     parameters: Optional[Dict[str, str]] = Field(
@@ -316,8 +316,8 @@ class ConfigurationItem(Serializable):
     multi_model_deployment: Optional[List[MultiModelConfig]] = Field(
         default_factory=list, description="A list of multi model configuration details."
     )
-    shape_info: Optional[ShapeInfo] = Field(
-        default_factory=ShapeInfo,
+    shape_info: Optional[DeploymentShapeInfo] = Field(
+        default_factory=DeploymentShapeInfo,
         description="The shape information to this model for specific shape",
     )
 

From 04a4636973e7f1c72148f08a01a685f64ffd5c32 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Mon, 10 Feb 2025 16:46:23 -0800
Subject: [PATCH 028/124] added validation method

---
 ads/aqua/model/model.py                       |   3 +
 ads/aqua/modeldeployment/deployment.py        |   7 +
 ads/aqua/modeldeployment/entities.py          | 251 ++++++++++--------
 .../unitary/with_extras/aqua/test_multi.ipynb |   0
 4 files changed, 155 insertions(+), 106 deletions(-)
 create mode 100644 tests/unitary/with_extras/aqua/test_multi.ipynb

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 7276de085..d7754568e 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -77,6 +77,9 @@
     ModelFormat,
     ModelValidationResult,
 )
+
+from ads.aqua.modeldeployment.deployment import AquaDeploymentApp
+
 from ads.aqua.ui import AquaContainerConfig, AquaContainerConfigItem
 from ads.common.auth import default_signer
 from ads.common.oci_resource import SEARCH_TYPE, OCIResource
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 0291f98c4..cc0ba14ad 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -154,6 +154,13 @@ def create(
                 defined_tags=defined_tags,
             )
         else:
+            model_ids = [model.model_id for model in create_deployment_details.models]
+
+            try:
+                CreateModelDeploymentDetails.validate_config(self.get_multimodel_deployment_config(model_ids = model_ids))
+            except ValidationError as e:
+                print(e)
+
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
                 compartment_id=compartment_id,
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 67fd360e3..7a42dac95 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -14,7 +14,6 @@
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
 
-
 class ModelParams(Serializable):
     max_tokens: Optional[int] = None
     temperature: Optional[float] = None
@@ -131,111 +130,6 @@ class Config:
         extra = "ignore"
 
 
-class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
-    """Represents a details of Aqua deployment."""
-
-    log_group: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier)
-    log: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier)
-
-    class Config:
-        extra = "ignore"
-
-
-class CreateModelDeploymentDetails(BaseModel):
-    """Class for creating Aqua model deployments."""
-
-    instance_shape: str = Field(
-        ..., description="The instance shape used for deployment."
-    )
-    display_name: str = Field(..., description="The name of the model deployment.")
-    compartment_id: Optional[str] = Field(None, description="The compartment OCID.")
-    project_id: Optional[str] = Field(None, description="The project OCID.")
-    description: Optional[str] = Field(
-        None, description="The description of the deployment."
-    )
-    model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
-    models: Optional[List[AquaMultiModelRef]] = Field(
-        None, description="List of models for multimodel deployment."
-    )
-    instance_count: int = Field(
-        None, description="Number of instances used for deployment."
-    )
-    log_group_id: Optional[str] = Field(
-        None, description="OCI logging group ID for logs."
-    )
-    access_log_id: Optional[str] = Field(
-        None,
-        description="OCID for access logs. "
-        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
-    )
-    predict_log_id: Optional[str] = Field(
-        None,
-        description="OCID for prediction logs."
-        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
-    )
-    bandwidth_mbps: Optional[int] = Field(
-        None, description="Bandwidth limit on the load balancer in Mbps."
-    )
-    web_concurrency: Optional[int] = Field(
-        None, description="Number of worker processes/threads for handling requests."
-    )
-    server_port: Optional[int] = Field(
-        None, description="Server port for the Docker container image."
-    )
-    health_check_port: Optional[int] = Field(
-        None, description="Health check port for the Docker container image."
-    )
-    env_var: Optional[Dict[str, str]] = Field(
-        default_factory=dict, description="Environment variables for deployment."
-    )
-    container_family: Optional[str] = Field(
-        None, description="Image family of the model deployment container runtime."
-    )
-    memory_in_gbs: Optional[float] = Field(
-        None, description="Memory (in GB) for the selected shape."
-    )
-    ocpus: Optional[float] = Field(
-        None, description="OCPU count for the selected shape."
-    )
-    model_file: Optional[str] = Field(
-        None, description="File used for model deployment."
-    )
-    private_endpoint_id: Optional[str] = Field(
-        None, description="Private endpoint ID for model deployment."
-    )
-    container_image_uri: Optional[str] = Field(
-        None,
-        description="Image URI for model deployment container runtime "
-        "(ignored for service-managed containers). "
-        "Required parameter for BYOC based deployments if this parameter was not set during "
-        "model registration.",
-    )
-    cmd_var: Optional[List[str]] = Field(
-        default_factory=list, description="Command variables for the container runtime."
-    )
-    freeform_tags: Optional[Dict[str, str]] = Field(
-        default_factory=dict, description="Freeform tags for model deployment."
-    )
-    defined_tags: Optional[Dict[str, Dict[str, str]]] = Field(
-        default_factory=dict, description="Defined tags for model deployment."
-    )
-
-    @model_validator(mode="before")
-    @classmethod
-    def validate(cls, values: Any) -> Any:
-        """Ensures exactly one of `model_id` or `models` is provided."""
-        model_id = values.get("model_id")
-        models = values.get("models")
-        if bool(model_id) == bool(models):  # Both set or both unset
-            raise ValueError(
-                "Exactly one of `model_id` or `models` must be provided to create a model deployment."
-            )
-        return values
-
-    class Config:
-        extra = "ignore"
-
-
 class MultiModelConfig(Serializable):
     """Describes how many GPUs and the parameters of specific shape for multi model deployment.
 
@@ -372,3 +266,148 @@ class ModelDeploymentConfigSummary(Serializable):
 
     class Config:
         extra = "ignore"
+
+class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
+    """Represents a details of Aqua deployment."""
+
+    log_group: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier)
+    log: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier)
+
+    class Config:
+        extra = "ignore"
+
+
+class CreateModelDeploymentDetails(BaseModel):
+    """Class for creating Aqua model deployments."""
+
+    instance_shape: str = Field(
+        ..., description="The instance shape used for deployment."
+    )
+    display_name: str = Field(..., description="The name of the model deployment.")
+    compartment_id: Optional[str] = Field(None, description="The compartment OCID.")
+    project_id: Optional[str] = Field(None, description="The project OCID.")
+    description: Optional[str] = Field(
+        None, description="The description of the deployment."
+    )
+    model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
+    models: Optional[List[AquaMultiModelRef]] = Field(
+        None, description="List of models for multimodel deployment."
+    )
+    instance_count: int = Field(
+        None, description="Number of instances used for deployment."
+    )
+    log_group_id: Optional[str] = Field(
+        None, description="OCI logging group ID for logs."
+    )
+    access_log_id: Optional[str] = Field(
+        None,
+        description="OCID for access logs. "
+        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
+    )
+    predict_log_id: Optional[str] = Field(
+        None,
+        description="OCID for prediction logs."
+        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
+    )
+    bandwidth_mbps: Optional[int] = Field(
+        None, description="Bandwidth limit on the load balancer in Mbps."
+    )
+    web_concurrency: Optional[int] = Field(
+        None, description="Number of worker processes/threads for handling requests."
+    )
+    server_port: Optional[int] = Field(
+        None, description="Server port for the Docker container image."
+    )
+    health_check_port: Optional[int] = Field(
+        None, description="Health check port for the Docker container image."
+    )
+    env_var: Optional[Dict[str, str]] = Field(
+        default_factory=dict, description="Environment variables for deployment."
+    )
+    container_family: Optional[str] = Field(
+        None, description="Image family of the model deployment container runtime."
+    )
+    memory_in_gbs: Optional[float] = Field(
+        None, description="Memory (in GB) for the selected shape."
+    )
+    ocpus: Optional[float] = Field(
+        None, description="OCPU count for the selected shape."
+    )
+    model_file: Optional[str] = Field(
+        None, description="File used for model deployment."
+    )
+    private_endpoint_id: Optional[str] = Field(
+        None, description="Private endpoint ID for model deployment."
+    )
+    container_image_uri: Optional[str] = Field(
+        None,
+        description="Image URI for model deployment container runtime "
+        "(ignored for service-managed containers). "
+        "Required parameter for BYOC based deployments if this parameter was not set during "
+        "model registration.",
+    )
+    cmd_var: Optional[List[str]] = Field(
+        default_factory=list, description="Command variables for the container runtime."
+    )
+    freeform_tags: Optional[Dict[str, str]] = Field(
+        default_factory=dict, description="Freeform tags for model deployment."
+    )
+    defined_tags: Optional[Dict[str, Dict[str, str]]] = Field(
+        default_factory=dict, description="Defined tags for model deployment."
+    )
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate(cls, values: Any) -> Any:
+        """Ensures exactly one of `model_id` or `models` is provided."""
+        model_id = values.get("model_id")
+        models = values.get("models")
+        if bool(model_id) == bool(models):  # Both set or both unset
+            raise ValueError(
+                "Exactly one of `model_id` or `models` must be provided to create a model deployment."
+            )
+        return values
+
+    class Config:
+        extra = "ignore"
+
+    def validate_config(self,
+                        models_config_summary: ModelDeploymentConfigSummary):
+        """In a Multi-Model Deployment, validates the following:
+            - checks if deployment is a multi-model deployment
+            - assigned GPU allocations per model are within the number of GPUs available in the shape, instance_shape
+            - validate if all models in model group can be deployed on the selected shape, instance_shape"""
+        if self.freeform_tags.get(Tags.MULTIMODEL_TYPE_TAG) == "true":
+            selected_shape = self.instance_shape
+            total_available_gpus = getattr(models_config_summary.gpu_allocation.get(selected_shape), "total_gpus_available", None)
+            models_allocated_gpus = getattr(models_config_summary.gpu_allocation.get(selected_shape), "models", None)
+
+            if not isinstance(total_available_gpus, int):
+                raise ValueError(f"Missing total GPU allocation for the selected shape {selected_shape}")
+
+            if not isinstance(models_allocated_gpus, List[GPUModelAllocation]):
+                raise ValueError("Missing GPU allocations by GPU shape")
+
+            sum_model_gpus = 0
+
+            for model in models_allocated_gpus:
+                sum_model_gpus += model.gpu_count
+
+            # check if total_gpus_available should be = to the sum (yes)
+            if sum_model_gpus > total_available_gpus:
+                raise ValueError(
+                    f"""selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs.
+                    Select a shape with a higher number of GPUs or use less GPUs within model group"""
+                )
+            model_deployment_config = models_config_summary.deployment_config
+
+            for ocid, model_config in model_deployment_config.items():
+                if selected_shape not in model_config.shape:
+                    raise ValueError(
+                    f"""selected shape {selected_shape} is not supported by model with OCID {ocid}"""
+                    )
+
+        else:
+            raise ValueError(
+                    "Model group is not a multi model deployment"
+                    )
diff --git a/tests/unitary/with_extras/aqua/test_multi.ipynb b/tests/unitary/with_extras/aqua/test_multi.ipynb
new file mode 100644
index 000000000..e69de29bb

From 8082934ee4b16d03102fbdcd58b125b52ad42a4c Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Mon, 10 Feb 2025 17:29:27 -0800
Subject: [PATCH 029/124] Enhances error messages for loading multi-model
 configurations.

---
 ads/aqua/modeldeployment/utils.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 099915e1f..a92c02164 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -66,16 +66,18 @@ def load(
         deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
         model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
 
-        for model, shape_gpu in model_shape_gpu.items():
+        for _, shape_gpu in model_shape_gpu.items():
             if not shape_gpu:
                 raise AquaValueError(
-                    f"There are no available shapes for model {model}, please select different model to deploy."
+                    "Unable to determine a valid GPU allocation for the selected models based on "
+                    "their current configurations. Please try to select a different set of models."
                 )
 
         common_shapes = self._get_common_shapes(model_shape_gpu)
         if not common_shapes:
             raise AquaValueError(
-                "No available shapes for selected models. Choose a different model."
+                "The selected models do not share any common deployment shapes. "
+                "Please ensure that all chosen models are compatible for multi-model deployment."
             )
 
         gpu_allocation = self._compute_gpu_allocation(
@@ -83,7 +85,8 @@ def load(
         )
         if not gpu_allocation:
             raise AquaValueError(
-                "No available GPU allocations. Choose a different model."
+                "Unable to determine a valid GPU allocation for the selected models based on "
+                "their current configurations. Please select a different set of models."
             )
 
         return ModelDeploymentConfigSummary(

From 0edb0ae425ce59a7eae4e6694e739334d176b345 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 11 Feb 2025 17:30:55 -0800
Subject: [PATCH 030/124] Enhances the multi-model config retriever.

---
 ads/aqua/modeldeployment/entities.py          | 17 +++---
 ads/aqua/modeldeployment/utils.py             | 52 ++++++++++++-------
 .../with_extras/aqua/test_deployment.py       | 50 +++++++++++++++---
 3 files changed, 87 insertions(+), 32 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 8766829f2..471dba138 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -212,13 +212,13 @@ class CreateModelDeploymentDetails(BaseModel):
         "model registration.",
     )
     cmd_var: Optional[List[str]] = Field(
-        default_factory=list, description="Command variables for the container runtime."
+        None, description="Command variables for the container runtime."
     )
-    freeform_tags: Optional[Dict[str, str]] = Field(
-        default_factory=dict, description="Freeform tags for model deployment."
+    freeform_tags: Optional[Dict] = Field(
+        None, description="Freeform tags for model deployment."
     )
-    defined_tags: Optional[Dict[str, Dict[str, str]]] = Field(
-        default_factory=dict, description="Defined tags for model deployment."
+    defined_tags: Optional[Dict] = Field(
+        None, description="Defined tags for model deployment."
     )
 
     @model_validator(mode="before")
@@ -364,7 +364,8 @@ class Config:
 
 
 class GPUShapeAllocation(Serializable):
-    """Allocation details for a specific GPU shape.
+    """
+    Allocation details for a specific GPU shape.
 
     Attributes:
         models (List[GPUModelAllocation], optional): List of model GPU allocations for this shape.
@@ -389,6 +390,7 @@ class ModelDeploymentConfigSummary(Serializable):
         deployment_config (Dict[str, AquaDeploymentConfig], optional): Deployment configurations
             keyed by model OCID.
         gpu_allocation (Dict[str, GPUShapeAllocation], optional): GPU allocations keyed by GPU shape.
+        error_message (str, optional): Error message if GPU allocation is not possible.
     """
 
     deployment_config: Optional[Dict[str, AquaDeploymentConfig]] = Field(
@@ -405,6 +407,9 @@ class ModelDeploymentConfigSummary(Serializable):
             "GPUs available for each shape."
         ),
     )
+    error_message: Optional[str] = Field(
+        default=None, description="Error message if GPU allocation is not possible."
+    )
 
     class Config:
         extra = "allow"
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index a92c02164..67d2d9ed8 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -5,11 +5,11 @@
 
 import copy
 import itertools
+import logging
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Optional
 
 from ads.aqua.app import AquaApp
-from ads.aqua.common.errors import AquaValueError
 from ads.aqua.modeldeployment.entities import (
     AquaDeploymentConfig,
     ConfigurationItem,
@@ -19,6 +19,8 @@
 )
 from ads.config import AQUA_MODEL_DEPLOYMENT_CONFIG
 
+logger = logging.getLogger("ads.aqua")
+
 
 class MultiModelDeploymentConfigLoader:
     """
@@ -26,7 +28,7 @@ class MultiModelDeploymentConfigLoader:
     and calculate optimal GPU allocations.
     """
 
-    MAX_WORKERS = 10
+    MAX_WORKERS = 10  # Number of workers for asynchronous models detail loading
 
     def __init__(self, deployment_app: AquaApp):
         """
@@ -56,42 +58,54 @@ def load(
         Returns
         -------
         ModelDeploymentConfigSummary
-            A summary of the deployment configurations and GPU allocations.
-
-        Raises
-        ------
-        AquaValueError
-            If no compatible shapes or GPU allocations are available.
+            A summary of the deployment configurations and GPU allocations. If GPU allocation
+            cannot be determined, an appropriate error message is included in the summary.
         """
+        # Fetch deployment configurations concurrently.
         deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
         model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
 
-        for _, shape_gpu in model_shape_gpu.items():
+        # Initialize the summary result with the deployment configurations.
+        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+
+        # Ensure every model has at least one valid GPU configuration.
+        for model, shape_gpu in model_shape_gpu.items():
             if not shape_gpu:
-                raise AquaValueError(
-                    "Unable to determine a valid GPU allocation for the selected models based on "
-                    "their current configurations. Please try to select a different set of models."
+                summary.error_message = (
+                    "Unable to determine a valid GPU allocation for the selected models based on their current configurations. "
+                    "Please try selecting a different set of models."
                 )
+                logger.debug(f"No valid GPU configuration found for model `{model}`")
+                return summary
 
+        # Identify common deployment shapes among all models.
         common_shapes = self._get_common_shapes(model_shape_gpu)
         if not common_shapes:
-            raise AquaValueError(
+            summary.error_message = (
                 "The selected models do not share any common deployment shapes. "
                 "Please ensure that all chosen models are compatible for multi-model deployment."
             )
+            logger.debug(
+                f"No common deployment shapes found among selected models: {model_ids}"
+            )
+            return summary
 
+        # Compute GPU allocations based on the common shapes and optionally prioritize a primary model.
         gpu_allocation = self._compute_gpu_allocation(
             common_shapes, model_shape_gpu, primary_model_id
         )
         if not gpu_allocation:
-            raise AquaValueError(
-                "Unable to determine a valid GPU allocation for the selected models based on "
-                "their current configurations. Please select a different set of models."
+            summary.error_message = (
+                "Unable to determine a valid GPU allocation for the selected models based on their current configurations. "
+                "Please select a different set of models."
+            )
+            logger.debug(
+                f"GPU allocation computation failed for selected models: {model_ids}"
             )
+            return summary
 
-        return ModelDeploymentConfigSummary(
-            deployment_config=deployment, gpu_allocation=gpu_allocation
-        )
+        summary.gpu_allocation = gpu_allocation
+        return summary
 
     def _fetch_deployment_configs_concurrently(
         self, model_ids: List[str]
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 94eb0b44d..4af3f6748 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -13,11 +13,11 @@
 
 import oci
 import pytest
-from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
 from parameterized import parameterized
 
 import ads.aqua.modeldeployment.deployment
 import ads.config
+from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.modeldeployment import AquaDeploymentApp, MDInferenceResponse
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
@@ -25,7 +25,7 @@
     AquaDeploymentDetail,
     ModelParams,
 )
-from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
+from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
 from ads.model.datascience_model import DataScienceModel
 from ads.model.deployment.model_deployment import ModelDeployment
 from ads.model.model_metadata import ModelCustomMetadata
@@ -33,6 +33,39 @@
 null = None
 
 
+@pytest.fixture(scope="module", autouse=True)
+def set_env():
+    os.environ["SERVICE_COMPARTMENT_ID"] = "ocid1.compartment.oc1..<OCID>"
+    os.environ["USER_COMPARTMENT_ID"] = "ocid1.compartment.oc1..<USER_COMPARTMENT_OCID>"
+    os.environ["USER_PROJECT_ID"] = "ocid1.project.oc1..<USER_PROJECT_OCID>"
+    os.environ["COMPARTMENT_ID"] = "ocid1.compartment.oc1..<USER_COMPARTMENT_OCID>"
+
+    os.environ["PROJECT_COMPARTMENT_OCID"] = (
+        "ocid1.compartment.oc1..<USER_COMPARTMENT_OCID>"
+    )
+    os.environ["NB_SESSION_COMPARTMENT_OCID"] = (
+        "ocid1.compartment.oc1..<USER_COMPARTMENT_OCID>"
+    )
+    os.environ["ODSC_MODEL_COMPARTMENT_OCID"] = (
+        "ocid1.compartment.oc1..<USER_COMPARTMENT_OCID>"
+    )
+
+    os.environ["MODEL_DEPLOYMENT_ID"] = (
+        "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
+    )
+    os.environ["MODEL_DEPLOYMENT_URL"] = (
+        "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
+    )
+    os.environ["MODEL_ID"] = (
+        "ocid1.datasciencemodeldeployment.oc1.<region>.<MODEL_OCID>"
+    )
+    os.environ["DEPLOYMENT_IMAGE_NAME"] = "dsmc://image-name:1.0.0.0"
+    os.environ["DEPLOYMENT_SHAPE_NAME"] = "BM.GPU.A10.4"
+    os.environ["DEPLOYMENT_GPU_COUNT"] = "1"
+    os.environ["DEPLOYMENT_GPU_COUNT_B"] = "2"
+    os.environ["DEPLOYMENT_SHAPE_NAME_CPU"] = "VM.Standard.A1.Flex"
+
+
 class TestDataset:
     SERVICE_COMPARTMENT_ID = "ocid1.compartment.oc1..<OCID>"
     USER_COMPARTMENT_ID = "ocid1.compartment.oc1..<USER_COMPARTMENT_OCID>"
@@ -257,7 +290,7 @@ class TestDataset:
         "created_on": "2024-01-01T00:00:00.000000+00:00",
         "created_by": "ocid1.user.oc1..<OCID>",
         "endpoint": MODEL_DEPLOYMENT_URL,
-        "private_endpoint_id": null,
+        "private_endpoint_id": "",
         "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
         "environment_variables": {
             "BASE_MODEL": "service_models/model-name/artifact",
@@ -428,6 +461,7 @@ class TestDataset:
                 "total_gpus_available": 8,
             },
         },
+        "error_message": None,
     }
 
     model_gpu_dict = {"model_a": [2, 4], "model_b": [1, 2, 4], "model_c": [1, 2, 8]}
@@ -587,11 +621,11 @@ def test_get_multimodel_deployment_config(
         [
             [
                 "shape",
-                "There are no available shapes for model model_a, please select different model to deploy.",
+                "Unable to determine a valid GPU allocation for the selected models based on their current configurations. Please try selecting a different set of models.",
             ],
             [
                 "configuration",
-                "No available GPU allocations. Choose a different model.",
+                "Unable to determine a valid GPU allocation for the selected models based on their current configurations. Please select a different set of models.",
             ],
         ]
     )
@@ -613,8 +647,10 @@ def test_get_multimodel_compatible_shapes_invalid_config(
         mock_fetch_deployment_configs_concurrently.return_value = {
             "model_a": AquaDeploymentConfig(**config)
         }
-        with pytest.raises(AquaValueError, match=error):
-            self.app.get_multimodel_deployment_config(["model_a"])
+
+        test_config = self.app.get_multimodel_deployment_config(["model_a"])
+
+        assert test_config.error_message == error
 
     def test_verify_compatibility(self):
         result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(

From df0b119c1119c9c5c5ac49406bd0c2c084fb2beb Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Wed, 12 Feb 2025 09:20:03 -0800
Subject: [PATCH 031/124] removed circular import

---
 ads/aqua/model/model.py                | 3 ---
 ads/aqua/modeldeployment/deployment.py | 4 +++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index d7754568e..7276de085 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -77,9 +77,6 @@
     ModelFormat,
     ModelValidationResult,
 )
-
-from ads.aqua.modeldeployment.deployment import AquaDeploymentApp
-
 from ads.aqua.ui import AquaContainerConfig, AquaContainerConfigItem
 from ads.common.auth import default_signer
 from ads.common.oci_resource import SEARCH_TYPE, OCIResource
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 7c70d8c5a..86472c16f 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -159,7 +159,9 @@ def create(
             model_ids = [model.model_id for model in create_deployment_details.models]
 
             try:
-                CreateModelDeploymentDetails.validate_config(self.get_multimodel_deployment_config(model_ids = model_ids))
+                model_deployment_details = CreateModelDeploymentDetails()
+                model_deployment_details.validate_config(models_config_summary=
+                                                             self.get_multimodel_deployment_config(model_ids = model_ids))
             except ValidationError as e:
                 print(e)
 

From 57b3ffae6e1bb0383f3059b9b0290ce4aa76eeae Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Wed, 12 Feb 2025 14:09:38 -0800
Subject: [PATCH 032/124] fixed validate_config call in deployment.py

---
 ads/aqua/modeldeployment/deployment.py        |  5 ++-
 .../with_extras/aqua/test_deployment.py       | 33 +++++++++++++++++++
 .../unitary/with_extras/aqua/test_multi.ipynb |  0
 3 files changed, 35 insertions(+), 3 deletions(-)
 delete mode 100644 tests/unitary/with_extras/aqua/test_multi.ipynb

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 86472c16f..a2a421edd 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -159,9 +159,8 @@ def create(
             model_ids = [model.model_id for model in create_deployment_details.models]
 
             try:
-                model_deployment_details = CreateModelDeploymentDetails()
-                model_deployment_details.validate_config(models_config_summary=
-                                                             self.get_multimodel_deployment_config(model_ids = model_ids))
+                model_config_summary =  self.get_multimodel_deployment_config(model_ids = model_ids)
+                create_deployment_details.validate_config(models_config_summary=model_config_summary)
             except ValidationError as e:
                 print(e)
 
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 94eb0b44d..96bd237ab 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -646,6 +646,39 @@ def test_verify_compatibility(self):
         assert result[1] == 0
         assert result[2] == []
 
+    @parameterized.expand(
+        [
+            [
+                "shape",
+                "There are no available shapes for model model_a, please select different model to deploy.",
+            ],
+            [
+                "configuration",
+                "No available GPU allocations. Choose a different model.",
+            ],
+        ]
+    )
+    @patch(
+        "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
+    )
+    def test_multi_model_validate_config(
+        self, missing_key, error, mock_fetch_deployment_configs_concurrently
+    ):
+        config_json = os.path.join(
+            self.curr_dir,
+            "test_data/deployment/aqua_multi_model_deployment_config.json",
+        )
+        with open(config_json, "r") as _file:
+            config = json.load(_file)
+
+        config.pop(missing_key)
+
+        mock_fetch_deployment_configs_concurrently.return_value = {
+            "model_a": AquaDeploymentConfig(**config)
+        }
+        with pytest.raises(AquaValueError, match=error):
+            self.app.get_multimodel_deployment_config(["model_a"])
+
     @patch("ads.aqua.modeldeployment.deployment.get_container_config")
     @patch("ads.aqua.model.AquaModelApp.create")
     @patch("ads.aqua.modeldeployment.deployment.get_container_image")
diff --git a/tests/unitary/with_extras/aqua/test_multi.ipynb b/tests/unitary/with_extras/aqua/test_multi.ipynb
deleted file mode 100644
index e69de29bb..000000000

From c15fb250e827bcd9ead40e12079032e95eca9d7a Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Wed, 12 Feb 2025 15:06:42 -0800
Subject: [PATCH 033/124] fixed type validation in validate_config

---
 ads/aqua/modeldeployment/entities.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 1704ed787..81390a67f 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -418,8 +418,9 @@ def validate_config(self,
             if not isinstance(total_available_gpus, int):
                 raise ValueError(f"Missing total GPU allocation for the selected shape {selected_shape}")
 
-            if not isinstance(models_allocated_gpus, List[GPUModelAllocation]):
-                raise ValueError("Missing GPU allocations by GPU shape")
+            if not all(isinstance(item, GPUModelAllocation) for item in models_allocated_gpus):
+                raise ValueError("GPU allocations must be instances of GPUModelAllocation")
+
 
             sum_model_gpus = 0
 

From fc246d3c3435d3209f4f143ec424163b5f1c2f07 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 13 Feb 2025 16:02:49 -0500
Subject: [PATCH 034/124] Added support to create multi model deployment.

---
 ads/aqua/common/enums.py                      |   1 +
 ads/aqua/constants.py                         |   4 +-
 ads/aqua/modeldeployment/deployment.py        | 204 +++++++++++++++-
 .../aqua_create_multi_deployment.yaml         |  42 ++++
 .../deployment/aqua_multi_model.yaml          | 105 ++++++++
 .../with_extras/aqua/test_deployment.py       | 224 ++++++++++++++++++
 6 files changed, 566 insertions(+), 14 deletions(-)
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml

diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
index 101a81952..5d672b036 100644
--- a/ads/aqua/common/enums.py
+++ b/ads/aqua/common/enums.py
@@ -31,6 +31,7 @@ class Tags(ExtendedEnum):
     AQUA_TAG = "OCI_AQUA"
     AQUA_SERVICE_MODEL_TAG = "aqua_service_model"
     AQUA_FINE_TUNED_MODEL_TAG = "aqua_fine_tuned_model"
+    AQUA_MODEL_ID_TAG = "aqua_model_id"
     AQUA_MODEL_NAME_TAG = "aqua_model_name"
     AQUA_EVALUATION = "aqua_evaluation"
     AQUA_FINE_TUNING = "aqua_finetuning"
diff --git a/ads/aqua/constants.py b/ads/aqua/constants.py
index 8e0d5ca76..5a909503c 100644
--- a/ads/aqua/constants.py
+++ b/ads/aqua/constants.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 """This module defines constants used in ads.aqua module."""
 
@@ -30,8 +30,10 @@
 READY_TO_FINE_TUNE_STATUS = "TRUE"
 PRIVATE_ENDPOINT_TYPE = "MODEL_DEPLOYMENT"
 AQUA_GA_LIST = ["id19sfcrra6z"]
+AQUA_MULTI_MODEL_CONFIG = "MULTI_MODEL_CONFIG"
 AQUA_MODEL_TYPE_SERVICE = "service"
 AQUA_MODEL_TYPE_CUSTOM = "custom"
+AQUA_MODEL_TYPE_MULTI = "multi_model"
 AQUA_MODEL_ARTIFACT_CONFIG = "config.json"
 AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME = "_name_or_path"
 AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE = "model_type"
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 486751d0f..c393537c5 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -28,7 +28,9 @@
 from ads.aqua.constants import (
     AQUA_MODEL_ARTIFACT_FILE,
     AQUA_MODEL_TYPE_CUSTOM,
+    AQUA_MODEL_TYPE_MULTI,
     AQUA_MODEL_TYPE_SERVICE,
+    AQUA_MULTI_MODEL_CONFIG,
     MODEL_BY_REFERENCE_OSS_PATH_KEY,
     UNKNOWN,
     UNKNOWN_DICT,
@@ -36,6 +38,7 @@
 from ads.aqua.data import AquaResourceIdentifier
 from ads.aqua.finetuning.finetuning import FineTuneCustomMetadata
 from ads.aqua.model import AquaModelApp
+from ads.aqua.model.constants import ModelCustomMetadataFields
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
     AquaDeploymentConfig,
@@ -163,11 +166,9 @@ def create(
                 freeform_tags=freeform_tags,
                 defined_tags=defined_tags,
             )
-
-        # todo: remove this once deployment support is added
-        if create_deployment_details.models:
-            raise AquaValueError(
-                "Deployment support for multimodel info is in progress."
+            return self._create_multi(
+                aqua_model=aqua_model,
+                create_deployment_details=create_deployment_details,
             )
 
         tags = {}
@@ -365,12 +366,193 @@ def create(
 
         for env in container_spec.get(ContainerSpec.ENV_VARS, []):
             if isinstance(env, dict):
-                for key, _items in env.items():
+                for key, _ in env.items():
                     if key not in env_var:
                         env_var.update(env)
 
         logger.info(f"Env vars used for deploying {aqua_model.id} :{env_var}")
 
+        tags = {**tags, **(create_deployment_details.freeform_tags or {})}
+        model_type = (
+            AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE
+        )
+
+        return self._create_deployment(
+            create_deployment_details=create_deployment_details,
+            aqua_model_id=aqua_model.id,
+            model_name=model_name,
+            model_type=model_type,
+            container_image_uri=container_image_uri,
+            server_port=server_port,
+            health_check_port=health_check_port,
+            env_var=env_var,
+            tags=tags,
+            cmd_var=cmd_var,
+        )
+
+    def _create_multi(
+        self,
+        aqua_model: DataScienceModel,
+        create_deployment_details: CreateModelDeploymentDetails,
+    ) -> Dict:
+        """Builds the environment variables required by multi deployment container and creates the deployment.
+
+        Parameters
+        ----------
+        aqua_model : DataScienceModel
+            An instance of Aqua data science model.
+        create_deployment_details : CreateModelDeploymentDetails
+            An instance of CreateModelDeploymentDetails containing all required and optional
+            fields for creating a model deployment via Aqua.
+
+        Returns
+        -------
+        AquaDeployment
+            An Aqua deployment instance.
+        """
+        model_config = []
+        model_name_list = []
+        env_var = {**(create_deployment_details.env_var or UNKNOWN_DICT)}
+
+        container_type_key = self._get_container_type_key(
+            model=aqua_model,
+            container_family=create_deployment_details.container_family,
+        )
+        container_config = get_container_config()
+        container_spec = container_config.get(
+            ContainerSpec.CONTAINER_SPEC, UNKNOWN_DICT
+        ).get(container_type_key, UNKNOWN_DICT)
+
+        container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN)
+
+        for idx, model in enumerate(create_deployment_details.models):
+            user_params = (
+                "".join(f"{name} {value}" for name, value in model.env_var.items())
+                if model.env_var
+                else UNKNOWN
+            )
+            if user_params:
+                restricted_params = self._find_restricted_params(
+                    container_params, user_params, container_type_key
+                )
+                if restricted_params:
+                    raise AquaValueError(
+                        f"Parameters {restricted_params} are set by Aqua "
+                        f"and cannot be overridden or are invalid."
+                        f"Select other parameters for model {model.model_id}."
+                    )
+
+            params = ""
+            deployment_config = self.get_deployment_config(model.model_id)
+            multi_model_deployment = deployment_config.configuration.get(
+                create_deployment_details.instance_shape, ConfigurationItem()
+            ).multi_model_deployment
+            for item in multi_model_deployment:
+                if (
+                    model.gpu_count
+                    and item.gpu_count
+                    and item.gpu_count == model.gpu_count
+                ):
+                    config_parameters = item.parameters.get(container_type_key, UNKNOWN)
+                    if config_parameters:
+                        params = f"{container_params} {get_combined_params(config_parameters, user_params)}"
+                        break
+
+            artifact_location_key = (
+                f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}"
+            )
+            artifact_path_prefix = aqua_model.custom_metadata_list.get(
+                artifact_location_key
+            ).value.rstrip("/")
+            if ObjectStorageDetails.is_oci_path(artifact_path_prefix):
+                os_path = ObjectStorageDetails.from_path(artifact_path_prefix)
+                artifact_path_prefix = os_path.filepath.rstrip("/")
+
+            model_config.append({"params": params, "model_path": artifact_path_prefix})
+
+            model_name_key = f"model-name-{idx}"
+            model_name_list.append(
+                aqua_model.custom_metadata_list.get(model_name_key).value
+            )
+
+        env_var.update({AQUA_MULTI_MODEL_CONFIG: {"models": model_config}})
+        logger.info(f"Env vars used for deploying {aqua_model.id} : {env_var}.")
+
+        container_image_uri = (
+            create_deployment_details.container_image_uri
+            or get_container_image(container_type=container_type_key)
+        )
+        server_port = create_deployment_details.server_port or container_spec.get(
+            ContainerSpec.SERVER_PORT
+        )
+        health_check_port = (
+            create_deployment_details.health_check_port
+            or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT)
+        )
+        tags = {
+            Tags.AQUA_MODEL_ID_TAG: aqua_model.id,
+            Tags.MULTIMODEL_TYPE_TAG: "true",
+            **(create_deployment_details.freeform_tags or UNKNOWN_DICT),
+        }
+
+        model_name = ", ".join(model_name_list)
+
+        return self._create_deployment(
+            create_deployment_details=create_deployment_details,
+            aqua_model_id=aqua_model.id,
+            model_name=model_name,
+            model_type=AQUA_MODEL_TYPE_MULTI,
+            container_image_uri=container_image_uri,
+            server_port=server_port,
+            health_check_port=health_check_port,
+            env_var=env_var,
+            tags=tags,
+        )
+
+    def _create_deployment(
+        self,
+        create_deployment_details: CreateModelDeploymentDetails,
+        aqua_model_id: str,
+        model_name: str,
+        model_type: str,
+        container_image_uri: str,
+        server_port: str,
+        health_check_port: str,
+        env_var: dict,
+        tags: dict,
+        cmd_var: Optional[dict] = None,
+    ):
+        """Creates data science model deployment.
+
+        Parameters
+        ----------
+        create_deployment_details : CreateModelDeploymentDetails
+            An instance of CreateModelDeploymentDetails containing all required and optional
+            fields for creating a model deployment via Aqua.
+        aqua_model_id: str
+            The id of the aqua model to be deployed.
+        model_name: str
+            The name of the aqua model to be deployed. If it's multi model deployment, it is a list of model names.
+        model_type: str
+            The type of aqua model to be deployed. Allowed values are: `custom`, `service` and `multi_model`.
+        container_image_uri: str
+            The container image uri to deploy the model.
+        server_port: str
+            The service port of the container image.
+        health_check_port: str
+            The health check port of the container image.
+        env_var: dict
+            The environment variables input for the deployment.
+        tags: dict
+            The tags input for the deployment.
+        cmd_var: dict, optional
+            The cmd arguments input for the deployment.
+
+        Returns
+        -------
+        AquaDeployment
+            An Aqua deployment instance.
+        """
         # Start model deployment
         # configure model deployment infrastructure
         infrastructure = (
@@ -410,7 +592,7 @@ def create(
             .with_health_check_port(health_check_port)
             .with_env(env_var)
             .with_deployment_mode(ModelDeploymentMode.HTTPS)
-            .with_model_uri(aqua_model.id)
+            .with_model_uri(aqua_model_id)
             .with_region(self.region)
             .with_overwrite_existing_artifact(True)
             .with_remove_existing_artifact(True)
@@ -418,7 +600,6 @@ def create(
         if cmd_var:
             container_runtime.with_cmd(cmd_var)
 
-        tags = {**tags, **(create_deployment_details.freeform_tags or {})}
         # configure model deployment and deploy model on container runtime
         deployment = (
             ModelDeployment()
@@ -430,12 +611,9 @@ def create(
             .with_runtime(container_runtime)
         ).deploy(wait_for_completion=False)
 
-        deployment_id = deployment.dsc_model_deployment.id
+        deployment_id = deployment.id
         logger.info(
-            f"Aqua model deployment {deployment_id} created for model {aqua_model.id}."
-        )
-        model_type = (
-            AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE
+            f"Aqua model deployment {deployment_id} created for model {aqua_model_id}."
         )
 
         # we arbitrarily choose last 8 characters of OCID to identify MD in telemetry
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
new file mode 100644
index 000000000..e83d8cbef
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
@@ -0,0 +1,42 @@
+kind: deployment
+spec:
+  createdBy: ocid1.user.oc1..<OCID>
+  displayName: multi-model-deployment-name
+  freeformTags:
+    OCI_AQUA: active
+    multimodel: "true"
+    aqua_model_id: model-id
+  id: "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
+  infrastructure:
+    kind: infrastructure
+    spec:
+      bandwidthMbps: 10
+      compartmentId: ocid1.compartment.oc1..<OCID>
+      deploymentType: SINGLE_MODEL
+      policyType: FIXED_SIZE
+      projectId: ocid1.datascienceproject.oc1.iad.<OCID>
+      replica: 1
+      shapeName: "BM.GPU.A10.4"
+    type: datascienceModelDeployment
+  lifecycleState: CREATING
+  modelDeploymentUrl: "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
+  runtime:
+    kind: runtime
+    spec:
+      env:
+        MODEL_DEPLOY_PREDICT_ENDPOINT: /v1/completions
+        MULTI_MODEL_CONFIG:
+          models:
+            - params: --served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096
+              model_path: models/model_one/5be6479/artifact/
+            - params: --served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096
+              model_path: models/model_two/83e9aa1/artifact/
+            - params: --served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096
+              model_path: models/model_three/83e9aa1/artifact/
+      healthCheckPort: 8080
+      image: "dsmc://image-name:1.0.0.0"
+      modelUri: "ocid1.datasciencemodeldeployment.oc1.<region>.<MODEL_OCID>"
+      serverPort: 8080
+    type: container
+  timeCreated: 2024-01-01T00:00:00.000000+00:00
+type: modelDeployment
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
new file mode 100644
index 000000000..09c72a3ed
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
@@ -0,0 +1,105 @@
+kind: datascienceModel
+spec:
+  artifact: oci://models@namespace/models/model-name/artifact
+  compartmentId: ocid1.compartment.oc1..<OCID>
+  customMetadataList:
+    data:
+    - category: Other
+      description: ID of model_one in the multimodel group.
+      key: model-id-0
+      value: ocid1.compartment.oc1..<OCID>
+    - category: Other
+      description: Name of model_one in the multimodel group.
+      key: model-name-0
+      value: model_one
+    - category: Other
+      description: Artifact path for model_one in the multimodel group.
+      key: artifact_location-0
+      value: model_one_path
+    - category: Other
+      description: ID of model_two in the multimodel group.
+      key: model-id-1
+      value: ocid1.compartment.oc1..<OCID>
+    - category: Other
+      description: Name of model_two in the multimodel group.
+      key: model-name-1
+      value: model_two
+    - category: Other
+      description: Artifact path for model_two in the multimodel group.
+      key: artifact_location-1
+      value: model_two_path
+    - category: Other
+      description: ID of model_three in the multimodel group.
+      key: model-id-2
+      value: ocid1.compartment.oc1..<OCID>
+    - category: Other
+      description: Name of model_three in the multimodel group.
+      key: model-name-2
+      value: model_three
+    - category: Other
+      description: Artifact path for model_three in the multimodel group.
+      key: artifact_location-2
+      value: model_three_path
+    - category: Other
+      description: Inference container mapping for multi_model
+      key: deployment-container
+      value: odsc-vllm-serving
+    - category: Other
+      description: Number of models in the group.
+      key: model_group_count
+      value: 3
+  definedTags: {}
+  description: Multi-model grouping using model_one, model_two, model_three.
+  displayName: multi_model
+  freeformTags:
+    OCI_AQUA: active
+    multimodel: "true"
+    license: Apache 2.0
+    organization: Organization
+    ready_to_fine_tune: false
+  id: ocid1.datasciencemodel.oc1.iad.<OCID>
+  lifecycleState: ACTIVE
+  modelDescription:
+    models:
+    - bucketName: models
+      namespace: namespace
+      objects:
+      - name: models/model_one/artifact/README.md
+        sizeInBytes: 10317
+        version: 450a8124-f5ca-4ee6-b4cf-c1dc05b13d46
+      - name: models/model_one/artifact/config.json
+        sizeInBytes: 950
+        version: 3ace781b-4a48-4e89-88b6-61f0db6d51ad
+      prefix: models/model_one/artifact
+    - bucketName: models
+      namespace: namespace
+      objects:
+      - name: models/model_two/artifact/README.md
+        sizeInBytes: 10317
+        version: 450a8124-f5ca-4ee6-b4cf-c1dc05b13d46
+      - name: models/model_two/artifact/config.json
+        sizeInBytes: 950
+        version: 3ace781b-4a48-4e89-88b6-61f0db6d51ad
+      prefix: models/model_two/artifact
+    - bucketName: models
+      namespace: namespace
+      objects:
+      - name: models/model_three/artifact/README.md
+        sizeInBytes: 10317
+        version: 450a8124-f5ca-4ee6-b4cf-c1dc05b13d46
+      - name: models/model_three/artifact/config.json
+        sizeInBytes: 950
+        version: 3ace781b-4a48-4e89-88b6-61f0db6d51ad
+      prefix: models/model_three/artifact
+    type: modelOSSReferenceDescription
+    version: '1.0'
+  projectId: ocid1.datascienceproject.oc1.iad.<OCID>
+  provenanceMetadata:
+    artifact_dir: null
+    git_branch: null
+    git_commit: 123456
+    repository_url: https://model-name-url.com
+    training_id: null
+    training_script_path: null
+  timeCreated: 2024-01-01T00:00:00.000000+00:00
+type: dataScienceModel
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 4af3f6748..966baf667 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -15,6 +15,7 @@
 import pytest
 from parameterized import parameterized
 
+from ads.aqua.common.entities import AquaMultiModelRef
 import ads.aqua.modeldeployment.deployment
 import ads.config
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
@@ -149,6 +150,91 @@ class TestDataset:
         }
     ]
 
+    multi_model_deployment_object = {
+        "category_log_details": oci.data_science.models.CategoryLogDetails(
+            **{
+                "access": oci.data_science.models.LogDetails(
+                    **{
+                        "log_group_id": "ocid1.loggroup.oc1.<region>.<OCID>",
+                        "log_id": "ocid1.log.oc1.<region>.<OCID>",
+                    }
+                ),
+                "predict": oci.data_science.models.LogDetails(
+                    **{
+                        "log_group_id": "ocid1.loggroup.oc1.<region>.<OCID>",
+                        "log_id": "ocid1.log.oc1.<region>.<OCID>",
+                    }
+                ),
+            }
+        ),
+        "compartment_id": "ocid1.compartment.oc1..<OCID>",
+        "created_by": "ocid1.user.oc1..<OCID>",
+        "defined_tags": {},
+        "description": "Mock description",
+        "display_name": "multi-model-deployment-name",
+        "freeform_tags": {
+            "OCI_AQUA": "active",
+            "aqua_model_id": "model-id",
+            "multimodel": "true",
+        },
+        "id": "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>",
+        "lifecycle_state": "ACTIVE",
+        "model_deployment_configuration_details": oci.data_science.models.SingleModelDeploymentConfigurationDetails(
+            **{
+                "deployment_type": "SINGLE_MODEL",
+                "environment_configuration_details": oci.data_science.models.OcirModelDeploymentEnvironmentConfigurationDetails(
+                    **{
+                        "cmd": [],
+                        "entrypoint": [],
+                        "environment_configuration_type": "OCIR_CONTAINER",
+                        "environment_variables": {
+                            "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
+                            "MULTI_MODEL_CONFIG": {
+                                "models": [
+                                    {
+                                        "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096",
+                                        "model_path": "models/model_one/5be6479/artifact/",
+                                    },
+                                    {
+                                        "params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096",
+                                        "model_path": "models/model_two/83e9aa1/artifact/",
+                                    },
+                                    {
+                                        "params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096",
+                                        "model_path": "models/model_three/83e9aa1/artifact/",
+                                    },
+                                ]
+                            },
+                        },
+                        "health_check_port": 8080,
+                        "image": "dsmc://image-name:1.0.0.0",
+                        "image_digest": "sha256:mock22373c16f2015f6f33c5c8553923cf8520217da0bd9504471c5e53cbc9d",
+                        "server_port": 8080,
+                    }
+                ),
+                "model_configuration_details": oci.data_science.models.ModelConfigurationDetails(
+                    **{
+                        "bandwidth_mbps": 10,
+                        "instance_configuration": oci.data_science.models.InstanceConfiguration(
+                            **{
+                                "instance_shape_name": DEPLOYMENT_SHAPE_NAME,
+                                "model_deployment_instance_shape_config_details": null,
+                            }
+                        ),
+                        "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
+                        "scaling_policy": oci.data_science.models.FixedSizeScalingPolicy(
+                            **{"instance_count": 1, "policy_type": "FIXED_SIZE"}
+                        ),
+                        "maximum_bandwidth_mbps": 10,
+                    }
+                ),
+            }
+        ),
+        "model_deployment_url": MODEL_DEPLOYMENT_URL,
+        "project_id": USER_PROJECT_ID,
+        "time_created": "2024-01-01T00:00:00.000000+00:00",
+    }
+
     model_deployment_object_gguf = [
         {
             "compartment_id": "ocid1.compartment.oc1..<OCID>",
@@ -310,6 +396,53 @@ class TestDataset:
         "tags": {"OCI_AQUA": "active", "aqua_model_name": "model-name"},
     }
 
+    aqua_multi_deployment_object = {
+        "id": "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>",
+        "display_name": "multi-model-deployment-name",
+        "aqua_service_model": False,
+        "aqua_model_name": "",
+        "state": "ACTIVE",
+        "description": "Mock description",
+        "created_on": "2024-01-01T00:00:00.000000+00:00",
+        "created_by": "ocid1.user.oc1..<OCID>",
+        "endpoint": MODEL_DEPLOYMENT_URL,
+        "private_endpoint_id": "",
+        "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
+        "environment_variables": {
+            "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
+            "MULTI_MODEL_CONFIG": {
+                "models": [
+                    {
+                        "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096",
+                        "model_path": "models/model_one/5be6479/artifact/",
+                    },
+                    {
+                        "params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096",
+                        "model_path": "models/model_two/83e9aa1/artifact/",
+                    },
+                    {
+                        "params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096",
+                        "model_path": "models/model_three/83e9aa1/artifact/",
+                    },
+                ]
+            },
+        },
+        "cmd": [],
+        "console_link": "https://cloud.oracle.com/data-science/model-deployments/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>?region=region-name",
+        "lifecycle_details": "",
+        "shape_info": {
+            "instance_shape": DEPLOYMENT_SHAPE_NAME,
+            "instance_count": 1,
+            "ocpus": null,
+            "memory_in_gbs": null,
+        },
+        "tags": {
+            "OCI_AQUA": "active",
+            "aqua_model_id": "model-id",
+            "multimodel": "true",
+        },
+    }
+
     aqua_deployment_gguf_env_vars = {
         "BASE_MODEL": "service_models/model-name/artifact",
         "BASE_MODEL_FILE": "model-name.gguf",
@@ -1002,6 +1135,97 @@ def test_create_deployment_for_tei_byoc_embedding_model(
         )
         assert actual_attributes == expected_result
 
+    @patch("ads.aqua.modeldeployment.deployment.get_container_config")
+    @patch("ads.aqua.model.AquaModelApp.create_multi")
+    @patch("ads.aqua.modeldeployment.deployment.get_container_image")
+    @patch("ads.model.deployment.model_deployment.ModelDeployment.deploy")
+    def test_create_deployment_for_multi_model(
+        self,
+        mock_deploy,
+        mock_get_container_image,
+        mock_create_multi,
+        mock_get_container_config,
+    ):
+        """Test to create a deployment for multi models."""
+        aqua_multi_model = os.path.join(
+            self.curr_dir, "test_data/deployment/aqua_multi_model.yaml"
+        )
+        mock_create_multi.return_value = DataScienceModel.from_yaml(
+            uri=aqua_multi_model
+        )
+        config_json = os.path.join(
+            self.curr_dir,
+            "test_data/deployment/aqua_multi_model_deployment_config.json",
+        )
+        with open(config_json, "r") as _file:
+            config = json.load(_file)
+
+        self.app.get_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(**config)
+        )
+
+        container_index_json = os.path.join(
+            self.curr_dir, "test_data/ui/container_index.json"
+        )
+        with open(container_index_json, "r") as _file:
+            container_index_config = json.load(_file)
+        mock_get_container_config.return_value = container_index_config
+
+        mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME
+        aqua_deployment = os.path.join(
+            self.curr_dir, "test_data/deployment/aqua_create_multi_deployment.yaml"
+        )
+        model_deployment_obj = ModelDeployment.from_yaml(uri=aqua_deployment)
+        model_deployment_dsc_obj = copy.deepcopy(
+            TestDataset.multi_model_deployment_object
+        )
+        model_deployment_dsc_obj["lifecycle_state"] = "CREATING"
+        model_deployment_obj.dsc_model_deployment = (
+            oci.data_science.models.ModelDeploymentSummary(**model_deployment_dsc_obj)
+        )
+        mock_deploy.return_value = model_deployment_obj
+
+        model_info_1 = AquaMultiModelRef(
+            model_id="test_model_id_1",
+            gpu_count=2,
+        )
+
+        model_info_2 = AquaMultiModelRef(
+            model_id="test_model_id_2",
+            gpu_count=2,
+        )
+
+        model_info_3 = AquaMultiModelRef(
+            model_id="test_model_id_3",
+            gpu_count=2,
+        )
+
+        result = self.app.create(
+            models=[model_info_1, model_info_2, model_info_3],
+            instance_shape=TestDataset.DEPLOYMENT_SHAPE_NAME,
+            display_name="multi-model-deployment-name",
+            log_group_id="ocid1.loggroup.oc1.<region>.<OCID>",
+            access_log_id="ocid1.log.oc1.<region>.<OCID>",
+            predict_log_id="ocid1.log.oc1.<region>.<OCID>",
+        )
+
+        mock_create_multi.assert_called_with(
+            models=[model_info_1, model_info_2, model_info_3],
+            compartment_id=TestDataset.USER_COMPARTMENT_ID,
+            project_id=TestDataset.USER_PROJECT_ID,
+            freeform_tags=None,
+            defined_tags=None,
+        )
+        mock_get_container_image.assert_called()
+        mock_deploy.assert_called()
+
+        expected_attributes = set(AquaDeployment.__annotations__.keys())
+        actual_attributes = result.to_dict()
+        assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
+        expected_result = copy.deepcopy(TestDataset.aqua_multi_deployment_object)
+        expected_result["state"] = "CREATING"
+        assert actual_attributes == expected_result
+
     @parameterized.expand(
         [
             (

From 48a1befdefc785ebcb38bcd762878d46df9a6dd3 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 13 Feb 2025 16:21:45 -0500
Subject: [PATCH 035/124] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index c393537c5..2a6e8c3b4 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -442,7 +442,7 @@ def _create_multi(
                         f"Select other parameters for model {model.model_id}."
                     )
 
-            params = ""
+            params = container_params
             deployment_config = self.get_deployment_config(model.model_id)
             multi_model_deployment = deployment_config.configuration.get(
                 create_deployment_details.instance_shape, ConfigurationItem()
@@ -453,10 +453,10 @@ def _create_multi(
                     and item.gpu_count
                     and item.gpu_count == model.gpu_count
                 ):
-                    config_parameters = item.parameters.get(container_type_key, UNKNOWN)
-                    if config_parameters:
-                        params = f"{container_params} {get_combined_params(config_parameters, user_params)}"
-                        break
+                    config_parameters = item.parameters.get(
+                        get_container_params_type(container_type_key), UNKNOWN
+                    )
+                    params = f"{params} {get_combined_params(config_parameters, user_params)}"
 
             artifact_location_key = (
                 f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}"

From 9352ca5bcf1e0883e9961fffa9cb7d5a0a229e85 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 13 Feb 2025 18:08:04 -0500
Subject: [PATCH 036/124] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 2a6e8c3b4..956b237d9 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -457,6 +457,7 @@ def _create_multi(
                         get_container_params_type(container_type_key), UNKNOWN
                     )
                     params = f"{params} {get_combined_params(config_parameters, user_params)}"
+                    break
 
             artifact_location_key = (
                 f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}"

From 2068c3d487f5d0cc071750d616a133f99ad5b11e Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Fri, 14 Feb 2025 11:58:19 -0800
Subject: [PATCH 037/124] finished unit tests for validate method

---
 README-development.md                         |   2 +-
 ads/aqua/modeldeployment/deployment.py        |  16 +-
 ads/aqua/modeldeployment/entities.py          |  84 +++--
 .../deployment/aqua_summary_multi_model.json  | 308 ++++++++++++++++++
 .../with_extras/aqua/test_deployment.py       | 133 ++++++++
 5 files changed, 514 insertions(+), 29 deletions(-)
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json

diff --git a/README-development.md b/README-development.md
index 9163b629a..fdecd7596 100644
--- a/README-development.md
+++ b/README-development.md
@@ -248,7 +248,7 @@ All the unit tests can be found [here](https://github.com/oracle/accelerated-dat
 The following commands detail how the unit tests can be run.
 ```
 # Run all tests in AQUA project
-python -m pytest -q tests/unitary/with_extras/aqua/test_deployment.py
+python -m pytest -q tests/unitary/with_extras/aqua/*
 
 # Run all tests specific to a module within in AQUA project (ex. test_deployment.py, test_model.py, etc.)
 python -m pytest -q tests/unitary/with_extras/aqua/test_deployment.py
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index a2a421edd..3a1f5a1a0 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -159,10 +159,18 @@ def create(
             model_ids = [model.model_id for model in create_deployment_details.models]
 
             try:
-                model_config_summary =  self.get_multimodel_deployment_config(model_ids = model_ids)
-                create_deployment_details.validate_config(models_config_summary=model_config_summary)
-            except ValidationError as e:
-                print(e)
+                model_config_summary = self.get_multimodel_deployment_config(
+                    model_ids=model_ids
+                )
+
+                if not model_config_summary.gpu_allocation:
+                    raise AquaValueError(model_config_summary.error_message)
+
+                create_deployment_details.validate_config(
+                    models_config_summary=model_config_summary
+                )
+            except ValueError as err:
+                raise AquaValueError(f"Error: {err}") from err
 
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index a60241c30..681f31c37 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -7,6 +7,7 @@
 from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
 from pydantic import BaseModel, Field, model_validator
 
+from ads.aqua import logger
 from ads.aqua.common.entities import AquaMultiModelRef, ShapeInfo
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
@@ -15,6 +16,7 @@
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
 
+
 class ModelParams(Serializable):
     max_tokens: Optional[int] = None
     temperature: Optional[float] = None
@@ -140,6 +142,7 @@ class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
     class Config:
         extra = "ignore"
 
+
 class ShapeInfoConfig(Serializable):
     """Describes how many memory and cpu to this model for specific shape.
 
@@ -409,47 +412,80 @@ def validate(cls, values: Any) -> Any:
             )
         return values
 
-    def validate_config(self,
-                    models_config_summary: ModelDeploymentConfigSummary):
-        """In a Multi-Model Deployment, validates the following:
-            - checks if deployment is a multi-model deployment
-            - assigned GPU allocations per model are within the number of GPUs available in the shape, instance_shape
-            - validate if all models in model group can be deployed on the selected shape, instance_shape"""
+    def validate_config(self, models_config_summary: ModelDeploymentConfigSummary):
+        """
+        Validates the model configuration for multi model deployments.
+
+        Parameters
+        ----------
+        models_config_summary : ModelDeploymentConfigSummary, optional
+            An instance of ModelDeploymentConfigSummary containing all required
+            fields (GPU Allocation, Deployment Configuration) for creating a multi model deployment via Aqua.
+
+        Raises
+        -------
+        ValueError:
+            When the deployment is NOT a multi model deployment
+            When assigned GPU Allocations per model are NOT within the number of GPUs available in the instance shape
+            When all models in model group can NOT be deployed on the instance shape with the selected GPU count
+        """
         if self.freeform_tags.get(Tags.MULTIMODEL_TYPE_TAG) == "true":
             selected_shape = self.instance_shape
-            total_available_gpus = getattr(models_config_summary.gpu_allocation.get(selected_shape), "total_gpus_available", None)
-            models_allocated_gpus = getattr(models_config_summary.gpu_allocation.get(selected_shape), "models", None)
+            total_available_gpus = getattr(
+                models_config_summary.gpu_allocation.get(selected_shape),
+                "total_gpus_available",
+                None,
+            )
+            models_allocated_gpus = getattr(
+                models_config_summary.gpu_allocation.get(selected_shape), "models", None
+            )
 
             if not isinstance(total_available_gpus, int):
-                raise ValueError(f"Missing total GPU allocation for the selected shape {selected_shape}")
+                raise ValueError(
+                    f"Missing total GPU allocation for the selected shape {selected_shape}"
+                )
 
-            if not all(isinstance(item, GPUModelAllocation) for item in models_allocated_gpus):
-                raise ValueError("GPU allocations must be instances of GPUModelAllocation")
+            if not all(
+                isinstance(item, GPUModelAllocation) for item in models_allocated_gpus
+            ):
+                raise ValueError(
+                    "GPU allocations must be instances of GPUModelAllocation"
+                )
 
+            model_deployment_config = models_config_summary.deployment_config
 
             sum_model_gpus = 0
 
             for model in models_allocated_gpus:
                 sum_model_gpus += model.gpu_count
 
-            # check if total_gpus_available should be = to the sum (yes)
-            if sum_model_gpus > total_available_gpus:
-                raise ValueError(
-                    f"""selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs.
-                    Select a shape with a higher number of GPUs or use less GPUs within model group"""
-                )
-            model_deployment_config = models_config_summary.deployment_config
+                aqua_deployment_config = model_deployment_config[model.ocid]
 
-            for ocid, model_config in model_deployment_config.items():
-                if selected_shape not in model_config.shape:
+                if selected_shape not in aqua_deployment_config.shape:
+                    logger.error(f"Selected shape {selected_shape} is not supported by model with OCID {model.ocid}")
                     raise ValueError(
-                    f"""selected shape {selected_shape} is not supported by model with OCID {ocid}"""
+                        f"Selected shape {selected_shape} is not supported by all models in model group."
                     )
 
+                multi_model_configs = aqua_deployment_config.configuration.get(
+                    selected_shape
+                ).multi_model_deployment
+
+                if not any(
+                    gpu_shape_config.gpu_count == model.gpu_count
+                    for gpu_shape_config in multi_model_configs
+                ):
+                    logger.error(f"MultiModelConfig with user assigned gpu_count={model.gpu_count} was not found for {model.ocid}")
+                    raise ValueError(f"The GPU allocation is not valid for all models in the selected shape {selected_shape}.")
+
+            if sum_model_gpus > total_available_gpus:
+                logger.error(f"Selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs.")
+                raise ValueError(
+                    "Select an instance shape with a higher number of GPUs or use less GPUs within model group."
+                )
+
         else:
-            raise ValueError(
-                    "Model group is not a multi model deployment"
-                    )
+            raise ValueError("Model group is not a multi model deployment")
 
     class Config:
         extra = "ignore"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json
new file mode 100644
index 000000000..d8f80ea59
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json
@@ -0,0 +1,308 @@
+{
+    "deployment_config": {
+        "model_a": {
+            "shape": [
+                "VM.GPU.A10.2",
+                "BM.GPU.A10.4",
+                "BM.GPU.A100-v2.8",
+                "BM.GPU.H100.8"
+            ],
+            "configuration": {
+                "VM.GPU.A10.2": {
+                    "parameters": {},
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.A10.4": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 4,
+                            "parameters": {}
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.A100-v2.8": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 1,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 8,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.H100.8": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 1,
+                            "parameters": {}
+                        },
+                        {
+                            "gpu_count": 2,
+                            "parameters": {}
+                        },
+                        {
+                            "gpu_count": 4,
+                            "parameters": {}
+                        },
+                        {
+                            "gpu_count": 8,
+                            "parameters": {}
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                }
+            }
+        },
+        "model_b": {
+            "shape": [
+                "VM.GPU.A10.2",
+                "BM.GPU.A10.4",
+                "BM.GPU.A100-v2.8",
+                "BM.GPU.H100.8"
+            ],
+            "configuration": {
+                "VM.GPU.A10.2": {
+                    "parameters": {},
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.A10.4": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 4,
+                            "parameters": {}
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.A100-v2.8": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 1,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 8,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.H100.8": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 1,
+                            "parameters": {}
+                        },
+                        {
+                            "gpu_count": 2,
+                            "parameters": {}
+                        },
+                        {
+                            "gpu_count": 8,
+                            "parameters": {}
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                }
+            }
+        },
+        "model_c": {
+            "shape": [
+                "VM.GPU.A10.2",
+                "BM.GPU.A10.4",
+                "BM.GPU.A100-v2.8",
+                "BM.GPU.H100.8"
+            ],
+            "configuration": {
+                "VM.GPU.A10.2": {
+                    "parameters": {},
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.A10.4": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 4,
+                            "parameters": {}
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.A100-v2.8": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 1,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 2,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        },
+                        {
+                            "gpu_count": 8,
+                            "parameters": {
+                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                            }
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                },
+                "BM.GPU.H100.8": {
+                    "parameters": {
+                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                    },
+                    "multi_model_deployment": [
+                        {
+                            "gpu_count": 1,
+                            "parameters": {}
+                        },
+                        {
+                            "gpu_count": 2,
+                            "parameters": {}
+                        },
+                        {
+                            "gpu_count": 4,
+                            "parameters": {}
+                        },
+                        {
+                            "gpu_count": 8,
+                            "parameters": {}
+                        }
+                    ],
+                    "shape_info": {
+                        "configs": [],
+                        "type": ""
+                    }
+                }
+            }
+        }
+    },
+    "gpu_allocation": {},
+    "error_message": "None"
+}
\ No newline at end of file
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 718823928..7a43f4c0e 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -17,12 +17,16 @@
 
 import ads.aqua.modeldeployment.deployment
 import ads.config
+from ads.aqua.common.entities import AquaMultiModelRef
+from ads.aqua.common.enums import Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.modeldeployment import AquaDeploymentApp, MDInferenceResponse
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
     AquaDeploymentConfig,
     AquaDeploymentDetail,
+    CreateModelDeploymentDetails,
+    ModelDeploymentConfigSummary,
     ModelParams,
 )
 from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
@@ -1265,3 +1269,132 @@ def test_get_model_deployment_response(self, mock_post):
 
         result = self.app.get_model_deployment_response(endpoint)
         assert result["choices"][0]["text"] == " The answer is 2"
+
+
+class TestCreateModelDeploymentDetails:
+    curr_dir = os.path.dirname(__file__)  # Define curr_dir
+
+    def validate_config_helper(self, models, instance_shape, display_name, total_gpus, multi_model="true"):
+        config_json = os.path.join(self.curr_dir, "test_data/deployment/aqua_summary_multi_model.json")
+
+        with open(config_json, "r") as _file:
+            config = json.load(_file)
+
+        config['gpu_allocation'] = {
+            instance_shape: {
+                "models": models,
+                "total_gpus_available": total_gpus
+            }
+        }
+
+        aqua_models = [AquaMultiModelRef(model_id=x["ocid"], gpu_count=x["gpu_count"]) for x in models]
+
+        mock_create_deployment_details = CreateModelDeploymentDetails(
+            models=aqua_models,
+            instance_shape=instance_shape,
+            display_name=display_name,
+            freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model}
+        )
+
+        mock_models_config_summary = ModelDeploymentConfigSummary(
+            **(config)
+        )
+
+        mock_create_deployment_details.validate_config(
+            models_config_summary=mock_models_config_summary
+        )
+
+    @pytest.mark.parametrize(
+        "models, instance_shape, display_name, total_gpus",
+        [
+            (
+                [
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 2}
+                ],
+                "BM.GPU.H100.8",
+                "test_a",
+                8
+            ),
+            (
+                [
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                ],
+                "BM.GPU.A10.4",
+                "test_a",
+                4
+            )
+
+        ]
+    )
+    def test_validate_config_positive(self, models, instance_shape, display_name, total_gpus):
+        self.validate_config_helper(models, instance_shape, display_name, total_gpus)
+
+    @pytest.mark.parametrize(
+        "models, instance_shape, display_name, total_gpus, multi_model, value_error",
+        [   (
+                [
+                {"ocid": "model_a", "gpu_count" : 2},
+                {"ocid": "model_b", "gpu_count" : 2},
+                {"ocid": "model_c", "gpu_count" : 4}],
+                "BM.GPU.H100.8",
+                'test_a',
+                None,
+                'true',
+                "Missing total GPU allocation for the selected shape BM.GPU.H100.8"
+
+            ),
+            (
+                [
+                {"ocid": "model_a", "gpu_count" : 2},
+                {"ocid": "model_b", "gpu_count" : 2},
+                {"ocid": "model_c", "gpu_count" : 4}],
+                "invalid_shape",
+                'test_a',
+                8,
+                'true',
+                "Selected shape invalid_shape is not supported by all models in model group."
+
+            ),
+            (
+                [
+                {"ocid": "model_a", "gpu_count" : 2},
+                {"ocid": "model_b", "gpu_count" : 4}, # model_b lacks this entry in loaded config
+                {"ocid": "model_c", "gpu_count" : 2}],
+                "BM.GPU.H100.8",
+                'test_a',
+                8,
+                'true',
+                "The GPU allocation is not valid for all models in the selected shape BM.GPU.H100.8."
+
+            ),
+            (
+                [
+                {"ocid": "model_a", "gpu_count" : 4},
+                {"ocid": "model_b", "gpu_count" : 2},
+                {"ocid": "model_c", "gpu_count" : 4}],
+                "BM.GPU.H100.8",
+                'test_a',
+                8,
+                'true',
+                "Select an instance shape with a higher number of GPUs or use less GPUs within model group."
+
+            ),
+            (
+                [
+                {"ocid": "model_a", "gpu_count" : 2},
+                {"ocid": "model_b", "gpu_count" : 2},
+                {"ocid": "model_c", "gpu_count" : 4}],
+                "BM.GPU.H100.8",
+                'test_a',
+                8,
+                'false',
+                "Model group is not a multi model deployment"
+            )
+        ]
+    )
+    def test_validate_config_negative(self, models, instance_shape, display_name, total_gpus, multi_model, value_error):
+        with pytest.raises(ValueError, match=value_error):
+            self.validate_config_helper(models, instance_shape, display_name, total_gpus, multi_model)

From eb0c93e78f47370e00386c3902ee03d50f0b73f9 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Fri, 14 Feb 2025 12:12:29 -0800
Subject: [PATCH 038/124] removed test_multi notebook

---
 .../model_deployment_config_summary.json      |  0
 .../with_extras/aqua/test_deployment.py       | 88 +++++++++++++++----
 2 files changed, 69 insertions(+), 19 deletions(-)
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/model_deployment_config_summary.json

diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/model_deployment_config_summary.json b/tests/unitary/with_extras/aqua/test_data/deployment/model_deployment_config_summary.json
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 7a43f4c0e..828c970eb 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -28,8 +28,21 @@
     CreateModelDeploymentDetails,
     ModelDeploymentConfigSummary,
     ModelParams,
+    CreateModelDeploymentDetails,
+    ModelDeploymentConfigSummary,
+    AquaMultiModelRef,
+    MultiModelConfig,
+    ConfigurationItem,
+    DeploymentShapeInfo,
 )
+<<<<<<< HEAD
 from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
+=======
+
+
+from ads.aqua.common.enums import Tags
+from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
+>>>>>>> c26cd283 (wip- fixing gpu count validation)
 from ads.model.datascience_model import DataScienceModel
 from ads.model.deployment.model_deployment import ModelDeployment
 from ads.model.model_metadata import ModelCustomMetadata
@@ -520,9 +533,9 @@ def test_list_deployments(self):
         expected_attributes = AquaDeployment.__annotations__.keys()
         for r in results:
             actual_attributes = r.to_dict()
-            assert set(actual_attributes) == set(
-                expected_attributes
-            ), "Attributes mismatch"
+            assert set(actual_attributes) == set(expected_attributes), (
+                "Attributes mismatch"
+            )
 
     @patch("ads.aqua.modeldeployment.deployment.get_resource_name")
     def test_get_deployment(self, mock_get_resource_name):
@@ -689,7 +702,8 @@ def test_verify_compatibility(self):
     @parameterized.expand(
         [
             [
-                "shape",
+                [2, 2, 2],
+                "BM.GPU.H100.8",
                 "There are no available shapes for model model_a, please select different model to deploy.",
             ],
             [
@@ -698,26 +712,62 @@ def test_verify_compatibility(self):
             ],
         ]
     )
-    @patch(
-        "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
-    )
+    @patch("ads.aqua.modeldeployment.deployment.AquaDeploymentApp.create")
     def test_multi_model_validate_config(
-        self, missing_key, error, mock_fetch_deployment_configs_concurrently
+        self,
+        gpu_counts,
+        user_instance_shape,
+        config_instance_shape,
+        total_gpus_available,
+        expected_validation
     ):
-        config_json = os.path.join(
-            self.curr_dir,
-            "test_data/deployment/aqua_multi_model_deployment_config.json",
-        )
-        with open(config_json, "r") as _file:
-            config = json.load(_file)
 
-        config.pop(missing_key)
+        mock_multi_model_ref = []
+        mock_deployment_config = {}
+        mock_gpu_allocation = {}
 
-        mock_fetch_deployment_configs_concurrently.return_value = {
-            "model_a": AquaDeploymentConfig(**config)
+        for index, model_gpu in enumerate(gpu_counts):
+            mock_model = MagicMock(spec=AquaMultiModelRef)
+            mock_model.gpu_count = model_gpu
+            mock_multi_model_ref.append(mock_model)
+
+            curr_key = f"model_id_{index}"
+            mock_deployment_config[curr_key] = MagicMock(spec=AquaDeploymentConfig)
+
+        # change instance shape
+        gpu_allocation = {
+            config_instance_shape: {
+                "total_gpus_available": total_gpus_available,
+                "models": [{"gpu_count": x} for x in gpu_counts],
+            }
         }
-        with pytest.raises(AquaValueError, match=error):
-            self.app.get_multimodel_deployment_config(["model_a"])
+
+        mock_aqua_deployment_config = MagicMock(spec=ModelDeploymentConfigSummary)
+        mock_aqua_deployment_config.configure_mock(
+            **{
+                "deployment_config": mock_deployment_config,
+                "gpu_allocation": gpu_allocation,
+            }
+        )
+
+        mock_models_config_summary = AquaDeploymentApp().get_multimodel_deployment_config = MagicMock(
+            return_value=ModelDeploymentConfigSummary(
+                deployment_config=mock_deployment_config,
+                gpu_allocation=mock_gpu_allocation,
+            )
+        )
+
+        model_deployment_config = CreateModelDeploymentDetails(
+            models=mock_multi_model_ref,
+            instance_shape=user_instance_shape,
+            freeform_tags={Tags.MULTIMODEL_TYPE_TAG: "true"},
+        )
+
+        obtained_validation = model_deployment_config.validate_config(
+            models_config_summary=mock_models_config_summary
+        )
+
+        assert obtained_validation == expected_validation
 
     @patch("ads.aqua.modeldeployment.deployment.get_container_config")
     @patch("ads.aqua.model.AquaModelApp.create")

From 9ffb89775dd71ddd44fc942bd8ace32fbb7d3b64 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Fri, 14 Feb 2025 12:14:27 -0800
Subject: [PATCH 039/124] fixed imports on unit test file

---
 tests/unitary/with_extras/aqua/test_deployment.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 828c970eb..516b06422 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -28,21 +28,8 @@
     CreateModelDeploymentDetails,
     ModelDeploymentConfigSummary,
     ModelParams,
-    CreateModelDeploymentDetails,
-    ModelDeploymentConfigSummary,
-    AquaMultiModelRef,
-    MultiModelConfig,
-    ConfigurationItem,
-    DeploymentShapeInfo,
 )
-<<<<<<< HEAD
 from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
-=======
-
-
-from ads.aqua.common.enums import Tags
-from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
->>>>>>> c26cd283 (wip- fixing gpu count validation)
 from ads.model.datascience_model import DataScienceModel
 from ads.model.deployment.model_deployment import ModelDeployment
 from ads.model.model_metadata import ModelCustomMetadata

From c85302e4504132f9f093cab182d77b42481bf391 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Fri, 14 Feb 2025 12:25:22 -0800
Subject: [PATCH 040/124] removed old unit test method

---
 .../with_extras/aqua/test_deployment.py       | 69 -------------------
 1 file changed, 69 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 516b06422..154a66940 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -686,75 +686,6 @@ def test_verify_compatibility(self):
         assert result[1] == 0
         assert result[2] == []
 
-    @parameterized.expand(
-        [
-            [
-                [2, 2, 2],
-                "BM.GPU.H100.8",
-                "There are no available shapes for model model_a, please select different model to deploy.",
-            ],
-            [
-                "configuration",
-                "No available GPU allocations. Choose a different model.",
-            ],
-        ]
-    )
-    @patch("ads.aqua.modeldeployment.deployment.AquaDeploymentApp.create")
-    def test_multi_model_validate_config(
-        self,
-        gpu_counts,
-        user_instance_shape,
-        config_instance_shape,
-        total_gpus_available,
-        expected_validation
-    ):
-
-        mock_multi_model_ref = []
-        mock_deployment_config = {}
-        mock_gpu_allocation = {}
-
-        for index, model_gpu in enumerate(gpu_counts):
-            mock_model = MagicMock(spec=AquaMultiModelRef)
-            mock_model.gpu_count = model_gpu
-            mock_multi_model_ref.append(mock_model)
-
-            curr_key = f"model_id_{index}"
-            mock_deployment_config[curr_key] = MagicMock(spec=AquaDeploymentConfig)
-
-        # change instance shape
-        gpu_allocation = {
-            config_instance_shape: {
-                "total_gpus_available": total_gpus_available,
-                "models": [{"gpu_count": x} for x in gpu_counts],
-            }
-        }
-
-        mock_aqua_deployment_config = MagicMock(spec=ModelDeploymentConfigSummary)
-        mock_aqua_deployment_config.configure_mock(
-            **{
-                "deployment_config": mock_deployment_config,
-                "gpu_allocation": gpu_allocation,
-            }
-        )
-
-        mock_models_config_summary = AquaDeploymentApp().get_multimodel_deployment_config = MagicMock(
-            return_value=ModelDeploymentConfigSummary(
-                deployment_config=mock_deployment_config,
-                gpu_allocation=mock_gpu_allocation,
-            )
-        )
-
-        model_deployment_config = CreateModelDeploymentDetails(
-            models=mock_multi_model_ref,
-            instance_shape=user_instance_shape,
-            freeform_tags={Tags.MULTIMODEL_TYPE_TAG: "true"},
-        )
-
-        obtained_validation = model_deployment_config.validate_config(
-            models_config_summary=mock_models_config_summary
-        )
-
-        assert obtained_validation == expected_validation
 
     @patch("ads.aqua.modeldeployment.deployment.get_container_config")
     @patch("ads.aqua.model.AquaModelApp.create")

From 49536156eb0c475ae9254e4b78072102904a6b41 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 14 Feb 2025 17:23:18 -0500
Subject: [PATCH 041/124] Updated pr.

---
 ads/aqua/common/entities.py                   |  2 +
 ads/aqua/modeldeployment/deployment.py        |  7 ++--
 .../aqua_create_multi_deployment.yaml         |  9 +---
 .../with_extras/aqua/test_deployment.py       | 41 ++++---------------
 4 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 69bea481d..2becc76bb 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -42,6 +42,8 @@ class AquaMultiModelRef(Serializable):
     ----------
     model_id : str
         The unique identifier of the model.
+    model_name : Optional[str]
+        The name of the model.
     gpu_count : Optional[int]
         Number of GPUs required for deployment.
     env_var : Optional[Dict[str, Any]]
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 956b237d9..12cd5ecb1 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -2,6 +2,7 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+import json
 import shlex
 from typing import Dict, List, Optional, Union
 
@@ -423,7 +424,7 @@ def _create_multi(
             ContainerSpec.CONTAINER_SPEC, UNKNOWN_DICT
         ).get(container_type_key, UNKNOWN_DICT)
 
-        container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN)
+        container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN).strip()
 
         for idx, model in enumerate(create_deployment_details.models):
             user_params = (
@@ -456,7 +457,7 @@ def _create_multi(
                     config_parameters = item.parameters.get(
                         get_container_params_type(container_type_key), UNKNOWN
                     )
-                    params = f"{params} {get_combined_params(config_parameters, user_params)}"
+                    params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
                     break
 
             artifact_location_key = (
@@ -476,7 +477,7 @@ def _create_multi(
                 aqua_model.custom_metadata_list.get(model_name_key).value
             )
 
-        env_var.update({AQUA_MULTI_MODEL_CONFIG: {"models": model_config}})
+        env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})})
         logger.info(f"Env vars used for deploying {aqua_model.id} : {env_var}.")
 
         container_image_uri = (
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
index e83d8cbef..fdc9a6fa9 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
@@ -25,14 +25,7 @@ spec:
     spec:
       env:
         MODEL_DEPLOY_PREDICT_ENDPOINT: /v1/completions
-        MULTI_MODEL_CONFIG:
-          models:
-            - params: --served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096
-              model_path: models/model_one/5be6479/artifact/
-            - params: --served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096
-              model_path: models/model_two/83e9aa1/artifact/
-            - params: --served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096
-              model_path: models/model_three/83e9aa1/artifact/
+        MULTI_MODEL_CONFIG: '{"models": [{"params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}'
       healthCheckPort: 8080
       image: "dsmc://image-name:1.0.0.0"
       modelUri: "ocid1.datasciencemodeldeployment.oc1.<region>.<MODEL_OCID>"
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 966baf667..a235ef9b5 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -189,22 +189,7 @@ class TestDataset:
                         "environment_configuration_type": "OCIR_CONTAINER",
                         "environment_variables": {
                             "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
-                            "MULTI_MODEL_CONFIG": {
-                                "models": [
-                                    {
-                                        "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096",
-                                        "model_path": "models/model_one/5be6479/artifact/",
-                                    },
-                                    {
-                                        "params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096",
-                                        "model_path": "models/model_two/83e9aa1/artifact/",
-                                    },
-                                    {
-                                        "params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096",
-                                        "model_path": "models/model_three/83e9aa1/artifact/",
-                                    },
-                                ]
-                            },
+                            "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}',
                         },
                         "health_check_port": 8080,
                         "image": "dsmc://image-name:1.0.0.0",
@@ -410,22 +395,7 @@ class TestDataset:
         "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
         "environment_variables": {
             "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
-            "MULTI_MODEL_CONFIG": {
-                "models": [
-                    {
-                        "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096",
-                        "model_path": "models/model_one/5be6479/artifact/",
-                    },
-                    {
-                        "params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096",
-                        "model_path": "models/model_two/83e9aa1/artifact/",
-                    },
-                    {
-                        "params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096",
-                        "model_path": "models/model_three/83e9aa1/artifact/",
-                    },
-                ]
-            },
+            "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}',
         },
         "cmd": [],
         "console_link": "https://cloud.oracle.com/data-science/model-deployments/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>?region=region-name",
@@ -1139,8 +1109,10 @@ def test_create_deployment_for_tei_byoc_embedding_model(
     @patch("ads.aqua.model.AquaModelApp.create_multi")
     @patch("ads.aqua.modeldeployment.deployment.get_container_image")
     @patch("ads.model.deployment.model_deployment.ModelDeployment.deploy")
+    @patch("ads.aqua.modeldeployment.AquaDeploymentApp.get_deployment_config")
     def test_create_deployment_for_multi_model(
         self,
+        mock_get_deployment_config,
         mock_deploy,
         mock_get_container_image,
         mock_create_multi,
@@ -1171,6 +1143,11 @@ def test_create_deployment_for_multi_model(
             container_index_config = json.load(_file)
         mock_get_container_config.return_value = container_index_config
 
+        deployment_config_json = os.path.join(
+            self.curr_dir, "test_data/deployment/deployment_gpu_config.json"
+        )
+        mock_get_deployment_config.return_value = deployment_config_json
+
         mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME
         aqua_deployment = os.path.join(
             self.curr_dir, "test_data/deployment/aqua_create_multi_deployment.yaml"

From 3566fe96b6f565ceac6b2de0074413797862f5c4 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Tue, 18 Feb 2025 17:31:04 -0500
Subject: [PATCH 042/124] Added validator for deployment container.

---
 ads/aqua/model/model.py                      | 13 +++++++------
 tests/unitary/with_extras/aqua/test_model.py | 20 +++++++++++++++++++-
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 7276de085..105a05063 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -254,7 +254,10 @@ def create_multi(
         artifact_list = []
         display_name_list = []
         model_custom_metadata = ModelCustomMetadata()
-        default_deployment_container = None
+        # TODO: update it when more deployment containers are supported
+        default_deployment_container = (
+            InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY
+        )
 
         # Process each model
         for idx, model in enumerate(models):
@@ -280,12 +283,10 @@ def create_multi(
                 ),
             ).value
 
-            if idx == 0:
-                default_deployment_container = deployment_container
-            elif deployment_container != default_deployment_container:
+            if default_deployment_container != deployment_container:
                 raise AquaValueError(
-                    "Deployment container mismatch detected. "
-                    "All selected models must use the same deployment container."
+                    f"Unsopported deployment container {deployment_container} detected for model {source_model.id}. "
+                    f"Currently only {InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY} container is supported for multi model deployment."
                 )
 
             # Add model-specific metadata
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index 338237c84..2c149217d 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -372,7 +372,7 @@ def test_create_multimodel(
         mock_model.artifact = "mock_artifact_path"
         custom_metadata_list = ModelCustomMetadata()
         custom_metadata_list.add(
-            **{"key": "deployment-container", "value": "odsc-vllm-serving"}
+            **{"key": "deployment-container", "value": "odsc-tgi-serving"}
         )
 
         mock_model.custom_metadata_list = custom_metadata_list
@@ -390,6 +390,24 @@ def test_create_multimodel(
             env_var={"params": "--trust-remote-code --max-model-len 32000"},
         )
 
+        with pytest.raises(
+            AquaValueError,
+            match="Unsopported deployment container odsc-tgi-serving detected for model mock_model_id. Currently only odsc-vllm-serving container is supported for multi model deployment.",
+        ):
+            model = self.app.create_multi(
+                models=[model_info_1, model_info_2],
+                project_id="test_project_id",
+                compartment_id="test_compartment_id",
+            )
+
+        custom_metadata_list = ModelCustomMetadata()
+        custom_metadata_list.add(
+            **{"key": "deployment-container", "value": "odsc-vllm-serving"}
+        )
+
+        mock_model.custom_metadata_list = custom_metadata_list
+        mock_from_id.return_value = mock_model
+
         # will create a multi-model group
         model = self.app.create_multi(
             models=[model_info_1, model_info_2],

From dce825e815f679bd936c17d95ade2da34ec5941c Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 19 Feb 2025 14:54:10 -0500
Subject: [PATCH 043/124] Updated tag.

---
 ads/aqua/modeldeployment/deployment.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 12cd5ecb1..25fb7b0cb 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -494,6 +494,7 @@ def _create_multi(
         tags = {
             Tags.AQUA_MODEL_ID_TAG: aqua_model.id,
             Tags.MULTIMODEL_TYPE_TAG: "true",
+            Tags.AQUA_TAG: "active",
             **(create_deployment_details.freeform_tags or UNKNOWN_DICT),
         }
 

From c04445d38a94b48b846f04fa30e394269ab7c1ea Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 19 Feb 2025 16:18:52 -0500
Subject: [PATCH 044/124] Added models to response.

---
 ads/aqua/common/entities.py                   | 14 ++--
 ads/aqua/modeldeployment/deployment.py        |  4 +-
 ads/aqua/modeldeployment/entities.py          | 67 ++++++++++++++-----
 .../with_extras/aqua/test_deployment.py       | 21 ++++++
 4 files changed, 84 insertions(+), 22 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 2becc76bb..294b82f59 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -4,6 +4,8 @@
 
 from typing import Optional
 
+from pydantic import Field
+
 from ads.aqua.config.utils.serializer import Serializable
 
 
@@ -50,10 +52,14 @@ class AquaMultiModelRef(Serializable):
         Optional environment variables to override during deployment.
     """
 
-    model_id: str
-    model_name: Optional[str] = None
-    gpu_count: Optional[int] = None
-    env_var: Optional[dict] = None
+    model_id: str = Field(..., description="The model OCID to deploy.")
+    model_name: Optional[str] = Field(None, description="The name of model.")
+    gpu_count: Optional[int] = Field(
+        None, description="The gpu count allocation for the model."
+    )
+    env_var: Optional[dict] = Field(
+        default_factory=dict, description="The environment variables of the model."
+    )
 
     class Config:
         extra = "ignore"
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 25fb7b0cb..62fa3b12c 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -500,7 +500,7 @@ def _create_multi(
 
         model_name = ", ".join(model_name_list)
 
-        return self._create_deployment(
+        aqua_deployment = self._create_deployment(
             create_deployment_details=create_deployment_details,
             aqua_model_id=aqua_model.id,
             model_name=model_name,
@@ -511,6 +511,8 @@ def _create_multi(
             env_var=env_var,
             tags=tags,
         )
+        aqua_deployment.models = create_deployment_details.models
+        return aqua_deployment
 
     def _create_deployment(
         self,
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 471dba138..e50baccc6 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -31,23 +31,56 @@ class Config:
 class AquaDeployment(Serializable):
     """Represents an Aqua Model Deployment"""
 
-    id: Optional[str] = None
-    display_name: Optional[str] = None
-    aqua_service_model: Optional[bool] = None
-    model_id: str = None
-    aqua_model_name: Optional[str] = None
-    state: Optional[str] = None
-    description: Optional[str] = None
-    created_on: Optional[str] = None
-    created_by: Optional[str] = None
-    endpoint: Optional[str] = None
-    private_endpoint_id: Optional[str] = None
-    console_link: Optional[str] = None
-    lifecycle_details: Optional[str] = None
-    shape_info: Optional[ShapeInfo] = None
-    tags: Optional[dict] = None
-    environment_variables: Optional[dict] = None
-    cmd: Optional[List[str]] = None
+    id: Optional[str] = Field(None, description="The model deployment OCID.")
+    display_name: Optional[str] = Field(
+        None, description="The name of the model deployment."
+    )
+    aqua_service_model: Optional[bool] = Field(
+        False, description="The bool value to indicate if it's aqua service model."
+    )
+    model_id: str = Field(..., description="The model OCID to deploy.")
+    models: Optional[List[AquaMultiModelRef]] = Field(
+        default_factory=list, description="List of models for multi model deployment."
+    )
+    aqua_model_name: Optional[str] = Field(
+        None, description="The name of the aqua model."
+    )
+    state: Optional[str] = Field(None, description="The state of the model deployment.")
+    description: Optional[str] = Field(
+        None, description="The description of the model deployment."
+    )
+    created_on: Optional[str] = Field(
+        None, description="The creation time of the model deployment."
+    )
+    created_by: Optional[str] = Field(
+        None, description="The OCID that creates the model deployment."
+    )
+    endpoint: Optional[str] = Field(
+        None, description="The endpoint of the model deployment."
+    )
+    private_endpoint_id: Optional[str] = Field(
+        None, description="The private endpoint id of the model deployment."
+    )
+    console_link: Optional[str] = Field(
+        None, description="The console link of the model deployment."
+    )
+    lifecycle_details: Optional[str] = Field(
+        None, description="The lifecycle details of the model deployment."
+    )
+    shape_info: Optional[ShapeInfo] = Field(
+        default_factory=ShapeInfo,
+        description="The shape information of the model deployment.",
+    )
+    tags: Optional[dict] = Field(
+        default_factory=dict, description="The tags of the model deployment."
+    )
+    environment_variables: Optional[dict] = Field(
+        default_factory=dict,
+        description="The environment variables of the model deployment.",
+    )
+    cmd: Optional[List[str]] = Field(
+        default_factory=list, description="The cmd of the model deployment."
+    )
 
     @classmethod
     def from_oci_model_deployment(
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index a235ef9b5..9b2096034 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -362,6 +362,7 @@ class TestDataset:
         "created_by": "ocid1.user.oc1..<OCID>",
         "endpoint": MODEL_DEPLOYMENT_URL,
         "private_endpoint_id": "",
+        "models": [],
         "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
         "environment_variables": {
             "BASE_MODEL": "service_models/model-name/artifact",
@@ -392,6 +393,26 @@ class TestDataset:
         "created_by": "ocid1.user.oc1..<OCID>",
         "endpoint": MODEL_DEPLOYMENT_URL,
         "private_endpoint_id": "",
+        "models": [
+            {
+                "env_var": {},
+                "gpu_count": 2,
+                "model_id": "test_model_id_1",
+                "model_name": None,
+            },
+            {
+                "env_var": {},
+                "gpu_count": 2,
+                "model_id": "test_model_id_2",
+                "model_name": None,
+            },
+            {
+                "env_var": {},
+                "gpu_count": 2,
+                "model_id": "test_model_id_3",
+                "model_name": None,
+            },
+        ],
         "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
         "environment_variables": {
             "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",

From 4524271f4abd75993622887260ccb36ebdb05305 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Wed, 19 Feb 2025 16:36:50 -0800
Subject: [PATCH 045/124] addressed code review comments

---
 ads/aqua/modeldeployment/deployment.py        |   5 +-
 ads/aqua/modeldeployment/entities.py          | 118 +++++++++++-------
 .../deployment/aqua_summary_multi_model.json  |  49 +++-----
 .../with_extras/aqua/test_deployment.py       | 100 +++++++++------
 4 files changed, 154 insertions(+), 118 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 3a1f5a1a0..863b3b39c 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -41,6 +41,7 @@
     AquaDeploymentConfig,
     AquaDeploymentDetail,
     ConfigurationItem,
+    ConfigValidationError,
     CreateModelDeploymentDetails,
     ModelDeploymentConfigSummary,
 )
@@ -166,10 +167,10 @@ def create(
                 if not model_config_summary.gpu_allocation:
                     raise AquaValueError(model_config_summary.error_message)
 
-                create_deployment_details.validate_config(
+                create_deployment_details.validate_multimodel_deployment_feasibility(
                     models_config_summary=model_config_summary
                 )
-            except ValueError as err:
+            except ConfigValidationError as err:
                 raise AquaValueError(f"Error: {err}") from err
 
             aqua_model = model_app.create_multi(
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 681f31c37..8c5a4d522 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -288,6 +288,17 @@ class GPUShapeAllocation(Serializable):
     class Config:
         extra = "allow"
 
+class ConfigValidationError(Exception):
+    """Exception raised for config validation."""
+
+    def __init__(
+        self,
+        message: str = """Validation failed: The provided model group configuration is not compatible with the selected instance shape.
+        Please check GPU count per model and multi-model deployment support for the selected instance shape.""",
+    ):
+        super().__init__(
+            message
+        )
 
 class ModelDeploymentConfigSummary(Serializable):
     """Top-level configuration model for OCI-based deployments.
@@ -412,9 +423,18 @@ def validate(cls, values: Any) -> Any:
             )
         return values
 
-    def validate_config(self, models_config_summary: ModelDeploymentConfigSummary):
+    def validate_multimodel_deployment_feasibility(self, models_config_summary: ModelDeploymentConfigSummary):
         """
-        Validates the model configuration for multi model deployments.
+        Validates whether the user input of a model group (List[AquaMultiModelRef], 2+ models with a specified gpu count per model)
+        is feasible for a multi model deployment on the user's selected shape (instance_shape)
+
+        Validation Criteria:
+            - GPU Capacity: Ensures that the total number of GPUs requested by all models in the group does not exceed the GPU capacity of the selected instance shape.  
+            - Verifies that all models in the group are compatible with the selected instance shape.
+            - Ensures that each model’s GPU allocation, as specified by the user, matches the requirements in the model's deployment configuration.
+            - Confirms that the selected instance shape supports multi-model deployment.
+            - Requires user input for the model group to be considered a valid multi-model deployment.
+
 
         Parameters
         ----------
@@ -424,68 +444,76 @@ def validate_config(self, models_config_summary: ModelDeploymentConfigSummary):
 
         Raises
         -------
-        ValueError:
+        ConfigValidationError:
             When the deployment is NOT a multi model deployment
             When assigned GPU Allocations per model are NOT within the number of GPUs available in the instance shape
             When all models in model group can NOT be deployed on the instance shape with the selected GPU count
         """
-        if self.freeform_tags.get(Tags.MULTIMODEL_TYPE_TAG) == "true":
-            selected_shape = self.instance_shape
-            total_available_gpus = getattr(
-                models_config_summary.gpu_allocation.get(selected_shape),
-                "total_gpus_available",
-                None,
-            )
-            models_allocated_gpus = getattr(
-                models_config_summary.gpu_allocation.get(selected_shape), "models", None
+        if not self.models:
+            logger.error(
+                "Validation Failed: User defined model group is None."
             )
+            raise ConfigValidationError("""Validation Failed: At least two models are required for multi-model deployment,
+                                        but only none were provided. Add 2 or more models in the model group to proceed.""")
 
-            if not isinstance(total_available_gpus, int):
-                raise ValueError(
-                    f"Missing total GPU allocation for the selected shape {selected_shape}"
+        selected_shape = self.instance_shape
+
+        if selected_shape not in models_config_summary.gpu_allocation:
+            logger.error(
+                    f"Validation Failed: The model group are not compatible with the selected instance shape {selected_shape}"
                 )
+            raise ConfigValidationError("Validation Failed: Select a different instance shape. The selected instance shape is not supported.")
+
+        total_available_gpus = models_config_summary.gpu_allocation[selected_shape].total_gpus_available
+
+        model_deployment_config = models_config_summary.deployment_config
 
-            if not all(
-                isinstance(item, GPUModelAllocation) for item in models_allocated_gpus
-            ):
-                raise ValueError(
-                    "GPU allocations must be instances of GPUModelAllocation"
+        required_model_keys = [model.model_id for model in self.models]
+        missing_model_keys = required_model_keys - model_deployment_config.keys()
+
+        if len(missing_model_keys) > 0:
+            logger.error(
+                    f"Validation Failed: Missing the following model entry with key {missing_model_keys} in ModelDeploymentConfigSummary"
                 )
+            raise ConfigValidationError("Validation Failed: One or more selected models are missing from the configuration, preventing validation for deployment on the given shape.")
 
-            model_deployment_config = models_config_summary.deployment_config
+        sum_model_gpus = 0
 
-            sum_model_gpus = 0
+        for model in self.models:
+            sum_model_gpus += model.gpu_count
 
-            for model in models_allocated_gpus:
-                sum_model_gpus += model.gpu_count
+            aqua_deployment_config = model_deployment_config[model.model_id]
 
-                aqua_deployment_config = model_deployment_config[model.ocid]
+            if selected_shape not in aqua_deployment_config.shape:
+                logger.error(
+                    f"Validation Failed: Model with OCID {model.model_id} in the model group is not compatible with the selected instance shape: {selected_shape}"
+                )
+                raise ConfigValidationError(
+                    "Validation Failed: Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
+                )
 
-                if selected_shape not in aqua_deployment_config.shape:
-                    logger.error(f"Selected shape {selected_shape} is not supported by model with OCID {model.ocid}")
-                    raise ValueError(
-                        f"Selected shape {selected_shape} is not supported by all models in model group."
-                    )
 
-                multi_model_configs = aqua_deployment_config.configuration.get(
-                    selected_shape
+            multi_model_configs = aqua_deployment_config.configuration.get(
+                selected_shape, ConfigurationItem()
                 ).multi_model_deployment
 
-                if not any(
-                    gpu_shape_config.gpu_count == model.gpu_count
-                    for gpu_shape_config in multi_model_configs
-                ):
-                    logger.error(f"MultiModelConfig with user assigned gpu_count={model.gpu_count} was not found for {model.ocid}")
-                    raise ValueError(f"The GPU allocation is not valid for all models in the selected shape {selected_shape}.")
-
-            if sum_model_gpus > total_available_gpus:
-                logger.error(f"Selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs.")
-                raise ValueError(
-                    "Select an instance shape with a higher number of GPUs or use less GPUs within model group."
+            valid_gpu_configurations = [gpu_shape_config.gpu_count for gpu_shape_config in multi_model_configs]
+            if model.gpu_count not in valid_gpu_configurations:
+                valid_gpu_str = ", ".join(map(str, valid_gpu_configurations))
+                logger.error(
+                    f"Validation Failed: Model {model.model_id} allocated {model.gpu_count} GPUs by user, but its deployment configuration requires either {valid_gpu_str} GPUs."
+                )
+                raise ConfigValidationError(
+                    "Validation Failed: Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
                 )
 
-        else:
-            raise ValueError("Model group is not a multi model deployment")
+        if sum_model_gpus > total_available_gpus:
+            logger.error(
+                f"Validation Failed: Selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs."
+            )
+            raise ConfigValidationError(
+                "Validation Failed: Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
+            )
 
     class Config:
         extra = "ignore"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json
index d8f80ea59..272f67a8f 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json
@@ -204,7 +204,6 @@
             "shape": [
                 "VM.GPU.A10.2",
                 "BM.GPU.A10.4",
-                "BM.GPU.A100-v2.8",
                 "BM.GPU.H100.8"
             ],
             "configuration": {
@@ -244,35 +243,6 @@
                         "type": ""
                     }
                 },
-                "BM.GPU.A100-v2.8": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 1,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 8,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
                 "BM.GPU.H100.8": {
                     "parameters": {
                         "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
@@ -303,6 +273,23 @@
             }
         }
     },
-    "gpu_allocation": {},
+    "gpu_allocation": {
+        "BM.GPU.H100.8" : {
+            "models": [
+                {"ocid": "model_a", "gpu_count" : 2},
+                {"ocid": "model_b", "gpu_count" : 2},
+                {"ocid": "model_c", "gpu_count" : 2}
+            ],
+            "total_gpus_available": 8
+        },
+        "BM.GPU.A100-v2.8" : {
+            "models": [
+                {"ocid": "model_a", "gpu_count" : 2},
+                {"ocid": "model_b", "gpu_count" : 2},
+                {"ocid": "model_c", "gpu_count" : 2}
+            ],
+            "total_gpus_available": 8
+        }
+    },
     "error_message": "None"
 }
\ No newline at end of file
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 154a66940..d3a9d414d 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -25,6 +25,7 @@
     AquaDeployment,
     AquaDeploymentConfig,
     AquaDeploymentDetail,
+    ConfigValidationError,
     CreateModelDeploymentDetails,
     ModelDeploymentConfigSummary,
     ModelParams,
@@ -1242,20 +1243,24 @@ def test_get_model_deployment_response(self, mock_post):
 class TestCreateModelDeploymentDetails:
     curr_dir = os.path.dirname(__file__)  # Define curr_dir
 
-    def validate_config_helper(self, models, instance_shape, display_name, total_gpus, multi_model="true"):
+    def validate_multimodel_deployment_feasibility_helper(self, models, instance_shape, display_name, total_gpus, multi_model="true"):
         config_json = os.path.join(self.curr_dir, "test_data/deployment/aqua_summary_multi_model.json")
 
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        config['gpu_allocation'] = {
-            instance_shape: {
-                "models": models,
-                "total_gpus_available": total_gpus
-            }
-        }
+        # config['gpu_allocation'] = {
+        #     instance_shape: {
+        #         "models": [
+        #         ],
+        #         "total_gpus_available": total_gpus
+        #     }
+        # }
 
-        aqua_models = [AquaMultiModelRef(model_id=x["ocid"], gpu_count=x["gpu_count"]) for x in models]
+        if models:
+            aqua_models = [AquaMultiModelRef(model_id=x["ocid"], gpu_count=x["gpu_count"]) for x in models]
+        else:
+            aqua_models = None
 
         mock_create_deployment_details = CreateModelDeploymentDetails(
             models=aqua_models,
@@ -1268,7 +1273,7 @@ def validate_config_helper(self, models, instance_shape, display_name, total_gpu
             **(config)
         )
 
-        mock_create_deployment_details.validate_config(
+        mock_create_deployment_details.validate_multimodel_deployment_feasibility(
             models_config_summary=mock_models_config_summary
         )
 
@@ -1288,42 +1293,60 @@ def validate_config_helper(self, models, instance_shape, display_name, total_gpu
             (
                 [
                     {"ocid": "model_a", "gpu_count": 2},
-                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 1},
+                    {"ocid": "model_c", "gpu_count": 4}
                 ],
-                "BM.GPU.A10.4",
+                "BM.GPU.H100.8",
                 "test_a",
-                4
-            )
+                8
+            ),
+            (
+                [
+                    {"ocid": "model_a", "gpu_count": 1},
+                    {"ocid": "model_b", "gpu_count": 1},
+                    {"ocid": "model_c", "gpu_count": 2}
+                ],
+                "BM.GPU.H100.8",
+                "test_a",
+                8
+            ),
 
         ]
     )
-    def test_validate_config_positive(self, models, instance_shape, display_name, total_gpus):
-        self.validate_config_helper(models, instance_shape, display_name, total_gpus)
+    def test_validate_multimodel_deployment_feasibility_positive(self, models, instance_shape, display_name, total_gpus):
+        self.validate_multimodel_deployment_feasibility_helper(models, instance_shape, display_name, total_gpus)
 
     @pytest.mark.parametrize(
-        "models, instance_shape, display_name, total_gpus, multi_model, value_error",
-        [   (
+        "models, instance_shape, display_name, total_gpus, value_error",
+        [
+            # (
+            #     [{"ocid": "model_a", "gpu_count" : 2}],
+            #     "BM.GPU.H100.8",
+            #     'test_a',
+            #     8,
+            #     "Validation Failed: At least two models are required for multi-model deployment, but only none were provided. Add 2 or more models in the model group to proceed."
+
+            # ),
+            (
                 [
                 {"ocid": "model_a", "gpu_count" : 2},
                 {"ocid": "model_b", "gpu_count" : 2},
                 {"ocid": "model_c", "gpu_count" : 4}],
-                "BM.GPU.H100.8",
+                "invalid_shape",
                 'test_a',
-                None,
-                'true',
-                "Missing total GPU allocation for the selected shape BM.GPU.H100.8"
-
+                8,
+                "Validation Failed: Select a different instance shape. The selected instance shape is not supported."
             ),
             (
                 [
                 {"ocid": "model_a", "gpu_count" : 2},
                 {"ocid": "model_b", "gpu_count" : 2},
-                {"ocid": "model_c", "gpu_count" : 4}],
-                "invalid_shape",
+                {"ocid": "model_c", "gpu_count" : 2},
+                {"ocid": "model_d", "gpu_count" : 2}],
+                "BM.GPU.H100.8",
                 'test_a',
                 8,
-                'true',
-                "Selected shape invalid_shape is not supported by all models in model group."
+                "Validation Failed: One or more selected models are missing from the configuration, preventing validation for deployment on the given shape."
 
             ),
             (
@@ -1334,35 +1357,32 @@ def test_validate_config_positive(self, models, instance_shape, display_name, to
                 "BM.GPU.H100.8",
                 'test_a',
                 8,
-                'true',
-                "The GPU allocation is not valid for all models in the selected shape BM.GPU.H100.8."
+                "Validation Failed: Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
 
             ),
             (
                 [
-                {"ocid": "model_a", "gpu_count" : 4},
+                {"ocid": "model_a", "gpu_count" : 2},
                 {"ocid": "model_b", "gpu_count" : 2},
-                {"ocid": "model_c", "gpu_count" : 4}],
-                "BM.GPU.H100.8",
+                {"ocid": "model_c", "gpu_count" : 2}], # model c is lacks BM.GPU.A100-v2.8
+                "BM.GPU.A100-v2.8",
                 'test_a',
                 8,
-                'true',
-                "Select an instance shape with a higher number of GPUs or use less GPUs within model group."
-
+                "Validation Failed: Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
             ),
             (
                 [
-                {"ocid": "model_a", "gpu_count" : 2},
+                {"ocid": "model_a", "gpu_count" : 4},
                 {"ocid": "model_b", "gpu_count" : 2},
                 {"ocid": "model_c", "gpu_count" : 4}],
                 "BM.GPU.H100.8",
                 'test_a',
                 8,
-                'false',
-                "Model group is not a multi model deployment"
+                "Validation Failed: Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
+
             )
         ]
     )
-    def test_validate_config_negative(self, models, instance_shape, display_name, total_gpus, multi_model, value_error):
-        with pytest.raises(ValueError, match=value_error):
-            self.validate_config_helper(models, instance_shape, display_name, total_gpus, multi_model)
+    def test_validate_multimodel_deployment_feasibility_negative(self, models, instance_shape, display_name, total_gpus, value_error):
+        with pytest.raises(ConfigValidationError, match=value_error):
+            self.validate_multimodel_deployment_feasibility_helper(models, instance_shape, display_name, total_gpus)

From 3a8a57d45f40c84477e30e1e43db683eb09e031b Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 20 Feb 2025 13:43:21 -0500
Subject: [PATCH 046/124] Added get multi deployment details.

---
 ads/aqua/common/entities.py            |  4 +--
 ads/aqua/model/model.py                | 19 +++++++++++++
 ads/aqua/modeldeployment/deployment.py | 37 +++++++++++++++++++++++---
 3 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 69bea481d..f0f600ec4 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -2,7 +2,7 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
-from typing import Optional
+from typing import Optional, Union
 
 from ads.aqua.config.utils.serializer import Serializable
 
@@ -51,7 +51,7 @@ class AquaMultiModelRef(Serializable):
     model_id: str
     model_name: Optional[str] = None
     gpu_count: Optional[int] = None
-    env_var: Optional[dict] = None
+    env_var: Optional[Union[dict, str]] = None
 
     class Config:
         extra = "ignore"
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 7276de085..03a596478 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -301,6 +301,25 @@ def create_multi(
                 description=f"Name of '{display_name}' in the multimodel group.",
                 category="Other",
             )
+            model_custom_metadata.add(
+                key=f"model-gpu-count-{idx}",
+                value=model.gpu_count,
+                description=f"GPU count of '{display_name}' in the multimodel group.",
+                category="Other",
+            )
+            user_params = (
+                " ".join(
+                    f"{name} {value}" for name, value in model.env_var.items()
+                ).strip()
+                if isinstance(model.env_var, dict)
+                else model.env_var
+            )
+            model_custom_metadata.add(
+                key=f"model-user-params-{idx}",
+                value=user_params,
+                description=f"User params of '{display_name}' in the multimodel group.",
+                category="Other",
+            )
             model_custom_metadata.add(
                 key=f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}",
                 value=model_artifact_path,
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 486751d0f..e241b9abc 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -8,7 +8,7 @@
 from pydantic import ValidationError
 
 from ads.aqua.app import AquaApp, logger
-from ads.aqua.common.entities import ContainerSpec
+from ads.aqua.common.entities import AquaMultiModelRef, ContainerSpec
 from ads.aqua.common.enums import InferenceContainerTypeFamily, Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
@@ -36,6 +36,7 @@
 from ads.aqua.data import AquaResourceIdentifier
 from ads.aqua.finetuning.finetuning import FineTuneCustomMetadata
 from ads.aqua.model import AquaModelApp
+from ads.aqua.model.constants import ModelCustomMetadataFields
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
     AquaDeploymentConfig,
@@ -626,10 +627,38 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail":
             source_id=model_deployment.id,
         )
 
+        aqua_deployment = AquaDeployment.from_oci_model_deployment(
+            model_deployment, self.region
+        )
+
+        if Tags.MULTIMODEL_TYPE_TAG in model_deployment.freeform_tags:
+            aqua_model_id = model_deployment.freeform_tags.get(
+                Tags.AQUA_MODEL_ID_TAG, UNKNOWN
+            )
+            if not aqua_model_id:
+                raise AquaRuntimeError(
+                    f"Invalid multi model deployment {model_deployment_id}."
+                    f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
+                )
+            aqua_model = DataScienceModel.from_id(aqua_model_id)
+            custom_metadata_list = aqua_model.custom_metadata_list
+            model_group_count = int(
+                custom_metadata_list.get(
+                    ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
+                ).value
+            )
+            aqua_deployment.models = [
+                AquaMultiModelRef(
+                    model_id=custom_metadata_list.get(f"model-id-{idx}").value,
+                    model_name=custom_metadata_list.get(f"model-name-{idx}").value,
+                    gpu_count=custom_metadata_list.get(f"model-gpu-count-{idx}").value,
+                    env_var=custom_metadata_list.get(f"model-user-params-{idx}").value,
+                )
+                for idx in model_group_count
+            ]
+
         return AquaDeploymentDetail(
-            **vars(
-                AquaDeployment.from_oci_model_deployment(model_deployment, self.region)
-            ),
+            **vars(aqua_deployment),
             log_group=AquaResourceIdentifier(
                 log_group_id, log_group_name, log_group_url
             ),

From 9958da0a9e3dad1d1e5c90ad7c2f78168c8beea4 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Thu, 20 Feb 2025 10:54:48 -0800
Subject: [PATCH 047/124] fixed code review 2nd round of comments

---
 ads/aqua/modeldeployment/deployment.py        |  2 +-
 ads/aqua/modeldeployment/entities.py          | 29 ++++++------
 .../with_extras/aqua/test_deployment.py       | 46 +++++++++----------
 3 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 863b3b39c..14a2e7e76 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -171,7 +171,7 @@ def create(
                     models_config_summary=model_config_summary
                 )
             except ConfigValidationError as err:
-                raise AquaValueError(f"Error: {err}") from err
+                raise AquaValueError(f"{err}") from err
 
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 8c5a4d522..907320756 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -293,8 +293,8 @@ class ConfigValidationError(Exception):
 
     def __init__(
         self,
-        message: str = """Validation failed: The provided model group configuration is not compatible with the selected instance shape.
-        Please check GPU count per model and multi-model deployment support for the selected instance shape.""",
+        message: str = """Validation failed: The provided model group configuration is incompatible with the selected instance shape.
+        Please verify the GPU count per model and ensure multi-model deployment is supported for the chosen instance shape.""",
     ):
         super().__init__(
             message
@@ -451,18 +451,17 @@ def validate_multimodel_deployment_feasibility(self, models_config_summary: Mode
         """
         if not self.models:
             logger.error(
-                "Validation Failed: User defined model group is None."
+                "User defined model group (List[AquaMultiModelRef]) is None."
             )
-            raise ConfigValidationError("""Validation Failed: At least two models are required for multi-model deployment,
-                                        but only none were provided. Add 2 or more models in the model group to proceed.""")
+            raise ConfigValidationError("Multi-model deployment requires at least one model, but none were provided. Please add one or more models to the model group to proceed.")
 
         selected_shape = self.instance_shape
 
         if selected_shape not in models_config_summary.gpu_allocation:
             logger.error(
-                    f"Validation Failed: The model group are not compatible with the selected instance shape {selected_shape}"
+                    f"The model group is not compatible with the selected instance shape {selected_shape}"
                 )
-            raise ConfigValidationError("Validation Failed: Select a different instance shape. The selected instance shape is not supported.")
+            raise ConfigValidationError(f"The model group is not compatible with the selected instance shape '{selected_shape}'. Select a different instance shape.")
 
         total_available_gpus = models_config_summary.gpu_allocation[selected_shape].total_gpus_available
 
@@ -473,9 +472,9 @@ def validate_multimodel_deployment_feasibility(self, models_config_summary: Mode
 
         if len(missing_model_keys) > 0:
             logger.error(
-                    f"Validation Failed: Missing the following model entry with key {missing_model_keys} in ModelDeploymentConfigSummary"
+                    f"Missing the following model entry with key {missing_model_keys} in ModelDeploymentConfigSummary"
                 )
-            raise ConfigValidationError("Validation Failed: One or more selected models are missing from the configuration, preventing validation for deployment on the given shape.")
+            raise ConfigValidationError("One or more selected models are missing from the configuration, preventing validation for deployment on the given shape.")
 
         sum_model_gpus = 0
 
@@ -486,10 +485,10 @@ def validate_multimodel_deployment_feasibility(self, models_config_summary: Mode
 
             if selected_shape not in aqua_deployment_config.shape:
                 logger.error(
-                    f"Validation Failed: Model with OCID {model.model_id} in the model group is not compatible with the selected instance shape: {selected_shape}"
+                    f"Model with OCID {model.model_id} in the model group is not compatible with the selected instance shape: {selected_shape}"
                 )
                 raise ConfigValidationError(
-                    "Validation Failed: Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
+                    "Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
                 )
 
 
@@ -501,18 +500,18 @@ def validate_multimodel_deployment_feasibility(self, models_config_summary: Mode
             if model.gpu_count not in valid_gpu_configurations:
                 valid_gpu_str = ", ".join(map(str, valid_gpu_configurations))
                 logger.error(
-                    f"Validation Failed: Model {model.model_id} allocated {model.gpu_count} GPUs by user, but its deployment configuration requires either {valid_gpu_str} GPUs."
+                    f"Model {model.model_id} allocated {model.gpu_count} GPUs by user, but its deployment configuration requires either {valid_gpu_str} GPUs."
                 )
                 raise ConfigValidationError(
-                    "Validation Failed: Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
+                    "Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
                 )
 
         if sum_model_gpus > total_available_gpus:
             logger.error(
-                f"Validation Failed: Selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs."
+                f"Selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs."
             )
             raise ConfigValidationError(
-                "Validation Failed: Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
+                "Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
             )
 
     class Config:
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index d3a9d414d..64b3857f8 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1249,25 +1249,23 @@ def validate_multimodel_deployment_feasibility_helper(self, models, instance_sha
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        # config['gpu_allocation'] = {
-        #     instance_shape: {
-        #         "models": [
-        #         ],
-        #         "total_gpus_available": total_gpus
-        #     }
-        # }
-
         if models:
             aqua_models = [AquaMultiModelRef(model_id=x["ocid"], gpu_count=x["gpu_count"]) for x in models]
-        else:
-            aqua_models = None
 
-        mock_create_deployment_details = CreateModelDeploymentDetails(
+            mock_create_deployment_details = CreateModelDeploymentDetails(
             models=aqua_models,
             instance_shape=instance_shape,
             display_name=display_name,
             freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model}
         )
+        else:
+            model_id = 'model_a'
+            mock_create_deployment_details = CreateModelDeploymentDetails(
+            model_id = model_id,
+            instance_shape=instance_shape,
+            display_name=display_name,
+            freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model}
+        )
 
         mock_models_config_summary = ModelDeploymentConfigSummary(
             **(config)
@@ -1319,14 +1317,14 @@ def test_validate_multimodel_deployment_feasibility_positive(self, models, insta
     @pytest.mark.parametrize(
         "models, instance_shape, display_name, total_gpus, value_error",
         [
-            # (
-            #     [{"ocid": "model_a", "gpu_count" : 2}],
-            #     "BM.GPU.H100.8",
-            #     'test_a',
-            #     8,
-            #     "Validation Failed: At least two models are required for multi-model deployment, but only none were provided. Add 2 or more models in the model group to proceed."
-
-            # ),
+            (
+                None,
+                "BM.GPU.H100.8",
+                'test_a',
+                8,
+                "Multi-model deployment requires at least one model, but none were provided. Please add one or more models to the model group to proceed."
+
+            ),
             (
                 [
                 {"ocid": "model_a", "gpu_count" : 2},
@@ -1335,7 +1333,7 @@ def test_validate_multimodel_deployment_feasibility_positive(self, models, insta
                 "invalid_shape",
                 'test_a',
                 8,
-                "Validation Failed: Select a different instance shape. The selected instance shape is not supported."
+                "The model group is not compatible with the selected instance shape 'invalid_shape'. Select a different instance shape."
             ),
             (
                 [
@@ -1346,7 +1344,7 @@ def test_validate_multimodel_deployment_feasibility_positive(self, models, insta
                 "BM.GPU.H100.8",
                 'test_a',
                 8,
-                "Validation Failed: One or more selected models are missing from the configuration, preventing validation for deployment on the given shape."
+                "One or more selected models are missing from the configuration, preventing validation for deployment on the given shape."
 
             ),
             (
@@ -1357,7 +1355,7 @@ def test_validate_multimodel_deployment_feasibility_positive(self, models, insta
                 "BM.GPU.H100.8",
                 'test_a',
                 8,
-                "Validation Failed: Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
+                "Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
 
             ),
             (
@@ -1368,7 +1366,7 @@ def test_validate_multimodel_deployment_feasibility_positive(self, models, insta
                 "BM.GPU.A100-v2.8",
                 'test_a',
                 8,
-                "Validation Failed: Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
+                "Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
             ),
             (
                 [
@@ -1378,7 +1376,7 @@ def test_validate_multimodel_deployment_feasibility_positive(self, models, insta
                 "BM.GPU.H100.8",
                 'test_a',
                 8,
-                "Validation Failed: Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
+                "Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
 
             )
         ]

From 78d3e3e9f601b15c6ba0ec5fadac3a72e62e1f01 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 20 Feb 2025 16:07:38 -0500
Subject: [PATCH 048/124] Updated pr.

---
 ads/aqua/model/model.py                      |  4 +--
 ads/aqua/modeldeployment/deployment.py       | 30 ++++++++++++++++++--
 tests/unitary/with_extras/aqua/test_model.py |  2 +-
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 105a05063..b5314cdbc 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -285,8 +285,8 @@ def create_multi(
 
             if default_deployment_container != deployment_container:
                 raise AquaValueError(
-                    f"Unsopported deployment container {deployment_container} detected for model {source_model.id}. "
-                    f"Currently only {InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY} container is supported for multi model deployment."
+                    f"Unsupported deployment container '{deployment_container}' for model '{source_model.id}'. "
+                    f"Only '{InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY}' is supported for multi-model deployments."
                 )
 
             # Add model-specific metadata
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 62fa3b12c..81dc745d8 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -159,6 +159,10 @@ def create(
                 freeform_tags=freeform_tags,
                 defined_tags=defined_tags,
             )
+            return self._create(
+                aqua_model=aqua_model,
+                create_deployment_details=create_deployment_details,
+            )
         else:
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
@@ -172,6 +176,26 @@ def create(
                 create_deployment_details=create_deployment_details,
             )
 
+    def _create(
+        self,
+        aqua_model: DataScienceModel,
+        create_deployment_details: CreateModelDeploymentDetails,
+    ) -> AquaDeployment:
+        """Builds the configurations required by single model deployment and creates the deployment.
+
+        Parameters
+        ----------
+        aqua_model : DataScienceModel
+            An instance of Aqua data science model.
+        create_deployment_details : CreateModelDeploymentDetails
+            An instance of CreateModelDeploymentDetails containing all required and optional
+            fields for creating a model deployment via Aqua.
+
+        Returns
+        -------
+        AquaDeployment
+            An Aqua deployment instance.
+        """
         tags = {}
         for tag in [
             Tags.AQUA_SERVICE_MODEL_TAG,
@@ -395,7 +419,7 @@ def _create_multi(
         self,
         aqua_model: DataScienceModel,
         create_deployment_details: CreateModelDeploymentDetails,
-    ) -> Dict:
+    ) -> AquaDeployment:
         """Builds the environment variables required by multi deployment container and creates the deployment.
 
         Parameters
@@ -428,7 +452,9 @@ def _create_multi(
 
         for idx, model in enumerate(create_deployment_details.models):
             user_params = (
-                "".join(f"{name} {value}" for name, value in model.env_var.items())
+                " ".join(
+                    f"{name} {value}" for name, value in model.env_var.items()
+                ).strip()
                 if model.env_var
                 else UNKNOWN
             )
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index 2c149217d..0ef8525f5 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -392,7 +392,7 @@ def test_create_multimodel(
 
         with pytest.raises(
             AquaValueError,
-            match="Unsopported deployment container odsc-tgi-serving detected for model mock_model_id. Currently only odsc-vllm-serving container is supported for multi model deployment.",
+            match="Unsupported deployment container 'odsc-tgi-serving' for model 'mock_model_id'. Only 'odsc-vllm-serving' is supported for multi-model deployments.",
         ):
             model = self.app.create_multi(
                 models=[model_info_1, model_info_2],

From cb31632408b9f6afc87b4dd2361ab06fcbef7ee6 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 20 Feb 2025 16:28:36 -0500
Subject: [PATCH 049/124] Updated pr.

---
 ads/aqua/common/entities.py            | 4 ++--
 ads/aqua/common/enums.py               | 1 +
 ads/aqua/modeldeployment/deployment.py | 4 +++-
 ads/aqua/modeldeployment/entities.py   | 1 +
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index f0f600ec4..69bea481d 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -2,7 +2,7 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
-from typing import Optional, Union
+from typing import Optional
 
 from ads.aqua.config.utils.serializer import Serializable
 
@@ -51,7 +51,7 @@ class AquaMultiModelRef(Serializable):
     model_id: str
     model_name: Optional[str] = None
     gpu_count: Optional[int] = None
-    env_var: Optional[Union[dict, str]] = None
+    env_var: Optional[dict] = None
 
     class Config:
         extra = "ignore"
diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
index 101a81952..5d672b036 100644
--- a/ads/aqua/common/enums.py
+++ b/ads/aqua/common/enums.py
@@ -31,6 +31,7 @@ class Tags(ExtendedEnum):
     AQUA_TAG = "OCI_AQUA"
     AQUA_SERVICE_MODEL_TAG = "aqua_service_model"
     AQUA_FINE_TUNED_MODEL_TAG = "aqua_fine_tuned_model"
+    AQUA_MODEL_ID_TAG = "aqua_model_id"
     AQUA_MODEL_NAME_TAG = "aqua_model_name"
     AQUA_EVALUATION = "aqua_evaluation"
     AQUA_FINE_TUNING = "aqua_finetuning"
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index e241b9abc..58cbb5d4c 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -652,7 +652,9 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail":
                     model_id=custom_metadata_list.get(f"model-id-{idx}").value,
                     model_name=custom_metadata_list.get(f"model-name-{idx}").value,
                     gpu_count=custom_metadata_list.get(f"model-gpu-count-{idx}").value,
-                    env_var=custom_metadata_list.get(f"model-user-params-{idx}").value,
+                    env_var=get_params_dict(
+                        custom_metadata_list.get(f"model-user-params-{idx}").value
+                    ),
                 )
                 for idx in model_group_count
             ]
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 471dba138..8e7d2919c 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -35,6 +35,7 @@ class AquaDeployment(Serializable):
     display_name: Optional[str] = None
     aqua_service_model: Optional[bool] = None
     model_id: str = None
+    models: Optional[AquaMultiModelRef] = None
     aqua_model_name: Optional[str] = None
     state: Optional[str] = None
     description: Optional[str] = None

From 9864e9320122dd724e7d814ea25cf185d0e02bd8 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 20 Feb 2025 16:31:06 -0500
Subject: [PATCH 050/124] Updated pr.

---
 ads/aqua/model/model.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 03a596478..f79eb888a 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -307,13 +307,9 @@ def create_multi(
                 description=f"GPU count of '{display_name}' in the multimodel group.",
                 category="Other",
             )
-            user_params = (
-                " ".join(
-                    f"{name} {value}" for name, value in model.env_var.items()
-                ).strip()
-                if isinstance(model.env_var, dict)
-                else model.env_var
-            )
+            user_params = " ".join(
+                f"{name} {value}" for name, value in model.env_var.items()
+            ).strip()
             model_custom_metadata.add(
                 key=f"model-user-params-{idx}",
                 value=user_params,

From 2c803e60a973280b7fa7a30bc38580b5921940f5 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 21 Feb 2025 12:53:15 -0500
Subject: [PATCH 051/124] Updated pr.

---
 ads/aqua/app.py                              |  3 +++
 ads/aqua/constants.py                        |  1 +
 ads/aqua/model/model.py                      | 17 +++++++++++++
 ads/aqua/modeldeployment/deployment.py       |  9 +++++--
 tests/unitary/with_extras/aqua/test_model.py | 25 ++++++++++++++++++++
 5 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/app.py b/ads/aqua/app.py
index f94b2b29b..937a0efdd 100644
--- a/ads/aqua/app.py
+++ b/ads/aqua/app.py
@@ -6,9 +6,11 @@
 import os
 import traceback
 from dataclasses import fields
+from datetime import datetime, timedelta
 from typing import Dict, Optional, Union
 
 import oci
+from cachetools import TTLCache, cached
 from oci.data_science.models import UpdateModelDetails, UpdateModelProvenanceDetails
 
 from ads import set_auth
@@ -268,6 +270,7 @@ def if_artifact_exist(self, model_id: str, **kwargs) -> bool:
                 logger.info(f"Artifact not found in model {model_id}.")
                 return False
 
+    @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=1), timer=datetime.now))
     def get_config(
         self,
         model_id: str,
diff --git a/ads/aqua/constants.py b/ads/aqua/constants.py
index 5a909503c..9aff5749c 100644
--- a/ads/aqua/constants.py
+++ b/ads/aqua/constants.py
@@ -40,6 +40,7 @@
 AQUA_MODEL_ARTIFACT_FILE = "model_file"
 HF_METADATA_FOLDER = ".cache/"
 HF_LOGIN_DEFAULT_TIMEOUT = 2
+MODEL_NAME_DELIMITER = ";"
 
 TRAINING_METRICS_FINAL = "training_metrics_final"
 VALIDATION_METRICS_FINAL = "validation_metrics_final"
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index b5314cdbc..3da7152f5 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -65,6 +65,7 @@
     FineTuningCustomMetadata,
     FineTuningMetricCategories,
     ModelCustomMetadataFields,
+    ModelTask,
     ModelType,
 )
 from ads.aqua.model.entities import (
@@ -263,6 +264,22 @@ def create_multi(
         for idx, model in enumerate(models):
             source_model = DataScienceModel.from_id(model.model_id)
             display_name = source_model.display_name
+
+            if not source_model.freeform_tags.get(Tags.AQUA_SERVICE_MODEL_TAG, UNKNOWN):
+                raise AquaValueError(
+                    f"Invalid selected model {display_name}. "
+                    "Currently only service models are supported for multi model deployment."
+                )
+
+            if (
+                source_model.freeform_tags.get(Tags.TASK, UNKNOWN)
+                != ModelTask.TEXT_GENERATION
+            ):
+                raise AquaValueError(
+                    f"Invalid or missing {Tags.TASK} tag for selected model {display_name}. "
+                    f"Currently only {ModelTask.TEXT_GENERATION} models are support for multi model deployment."
+                )
+
             display_name_list.append(display_name)
 
             # Retrieve model artifact
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 81dc745d8..f13e51cfd 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -33,6 +33,7 @@
     AQUA_MODEL_TYPE_SERVICE,
     AQUA_MULTI_MODEL_CONFIG,
     MODEL_BY_REFERENCE_OSS_PATH_KEY,
+    MODEL_NAME_DELIMITER,
     UNKNOWN,
     UNKNOWN_DICT,
 )
@@ -463,10 +464,11 @@ def _create_multi(
                     container_params, user_params, container_type_key
                 )
                 if restricted_params:
+                    selected_model = model.model_name or model.model_id
                     raise AquaValueError(
                         f"Parameters {restricted_params} are set by Aqua "
                         f"and cannot be overridden or are invalid."
-                        f"Select other parameters for model {model.model_id}."
+                        f"Select other parameters for model {selected_model}."
                     )
 
             params = container_params
@@ -474,6 +476,9 @@ def _create_multi(
             multi_model_deployment = deployment_config.configuration.get(
                 create_deployment_details.instance_shape, ConfigurationItem()
             ).multi_model_deployment
+            # finds the corresponding deployment parameters based on the gpu count
+            # and combines them with user's parameters. Existing deployment parameters
+            # will be overriden by user's parameters.
             for item in multi_model_deployment:
                 if (
                     model.gpu_count
@@ -524,7 +529,7 @@ def _create_multi(
             **(create_deployment_details.freeform_tags or UNKNOWN_DICT),
         }
 
-        model_name = ", ".join(model_name_list)
+        model_name = f"{MODEL_NAME_DELIMITER} ".join(model_name_list)
 
         aqua_deployment = self._create_deployment(
             create_deployment_details=create_deployment_details,
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index 0ef8525f5..f29150ab0 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -225,6 +225,7 @@ class TestDataset:
 
     SERVICE_COMPARTMENT_ID = "ocid1.compartment.oc1..<OCID>"
     COMPARTMENT_ID = "ocid1.compartment.oc1..<UNIQUE_OCID>"
+    SERVICE_MODEL_ID = "ocid1.datasciencemodel.oc1.iad.<OCID>"
 
 
 @patch("ads.config.COMPARTMENT_OCID", "ocid1.compartment.oc1.<unique_ocid>")
@@ -390,6 +391,30 @@ def test_create_multimodel(
             env_var={"params": "--trust-remote-code --max-model-len 32000"},
         )
 
+        with pytest.raises(
+            AquaValueError,
+            match="Invalid selected model test_display_name. Currently only service models are supported for multi model deployment.",
+        ):
+            model = self.app.create_multi(
+                models=[model_info_1, model_info_2],
+                project_id="test_project_id",
+                compartment_id="test_compartment_id",
+            )
+
+        mock_model.freeform_tags["aqua_service_model"] = TestDataset.SERVICE_MODEL_ID
+
+        with pytest.raises(
+            AquaValueError,
+            match="Invalid or missing task tag for selected model test_display_name. Currently only text-generation models are support for multi model deployment.",
+        ):
+            model = self.app.create_multi(
+                models=[model_info_1, model_info_2],
+                project_id="test_project_id",
+                compartment_id="test_compartment_id",
+            )
+
+        mock_model.freeform_tags["task"] = "text-generation"
+
         with pytest.raises(
             AquaValueError,
             match="Unsupported deployment container 'odsc-tgi-serving' for model 'mock_model_id'. Only 'odsc-vllm-serving' is supported for multi-model deployments.",

From 9c1513a38b18b219199ea1033b93ff3606afa16b Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 21 Feb 2025 15:38:40 -0500
Subject: [PATCH 052/124] Added unit tests.

---
 ads/aqua/model/model.py                       |  34 +--
 ads/aqua/modeldeployment/deployment.py        |  14 +-
 .../deployment/aqua_multi_model.yaml          |  24 ++
 .../with_extras/aqua/test_deployment.py       | 226 ++++++++++++------
 4 files changed, 214 insertions(+), 84 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 4cad4758c..22d32a636 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -319,21 +319,27 @@ def create_multi(
                 description=f"Name of '{display_name}' in the multimodel group.",
                 category="Other",
             )
-            model_custom_metadata.add(
-                key=f"model-gpu-count-{idx}",
-                value=model.gpu_count,
-                description=f"GPU count of '{display_name}' in the multimodel group.",
-                category="Other",
-            )
-            user_params = " ".join(
-                f"{name} {value}" for name, value in model.env_var.items()
-            ).strip()
-            model_custom_metadata.add(
-                key=f"model-user-params-{idx}",
-                value=user_params,
-                description=f"User params of '{display_name}' in the multimodel group.",
-                category="Other",
+            if model.gpu_count:
+                model_custom_metadata.add(
+                    key=f"model-gpu-count-{idx}",
+                    value=model.gpu_count,
+                    description=f"GPU count of '{display_name}' in the multimodel group.",
+                    category="Other",
+                )
+            user_params = (
+                " ".join(
+                    f"{name} {value}" for name, value in model.env_var.items()
+                ).strip()
+                if model.env_var
+                else UNKNOWN
             )
+            if user_params:
+                model_custom_metadata.add(
+                    key=f"model-user-params-{idx}",
+                    value=user_params,
+                    description=f"User params of '{display_name}' in the multimodel group.",
+                    category="Other",
+                )
             model_custom_metadata.add(
                 key=f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}",
                 value=model_artifact_path,
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 96c05e87f..8ed29e63e 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -69,6 +69,7 @@
     ModelDeploymentInfrastructure,
     ModelDeploymentMode,
 )
+from ads.model.model_metadata import ModelCustomMetadataItem
 from ads.telemetry import telemetry
 
 
@@ -881,12 +882,19 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail":
                 AquaMultiModelRef(
                     model_id=custom_metadata_list.get(f"model-id-{idx}").value,
                     model_name=custom_metadata_list.get(f"model-name-{idx}").value,
-                    gpu_count=custom_metadata_list.get(f"model-gpu-count-{idx}").value,
+                    gpu_count=custom_metadata_list.get(
+                        f"model-gpu-count-{idx}",
+                        ModelCustomMetadataItem(key=f"model-gpu-count-{idx}"),
+                    ).value,
                     env_var=get_params_dict(
-                        custom_metadata_list.get(f"model-user-params-{idx}").value
+                        custom_metadata_list.get(
+                            f"model-user-params-{idx}",
+                            ModelCustomMetadataItem(key=f"model-user-params-{idx}"),
+                        ).value
+                        or UNKNOWN_DICT
                     ),
                 )
-                for idx in model_group_count
+                for idx in range(model_group_count)
             ]
 
         return AquaDeploymentDetail(
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
index 09c72a3ed..00f141bd3 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
@@ -12,6 +12,14 @@ spec:
       description: Name of model_one in the multimodel group.
       key: model-name-0
       value: model_one
+    - category: Other
+      description: GPU count of model_one in the multimodel group.
+      key: model-gpu-count-0
+      value: 1
+    - category: Other
+      description: User params of model_one in the multimodel group.
+      key: model-user-params-0
+      value: --test_key_one test_value_one
     - category: Other
       description: Artifact path for model_one in the multimodel group.
       key: artifact_location-0
@@ -24,6 +32,14 @@ spec:
       description: Name of model_two in the multimodel group.
       key: model-name-1
       value: model_two
+    - category: Other
+      description: GPU count of model_two in the multimodel group.
+      key: model-gpu-count-1
+      value: 1
+    - category: Other
+      description: User params of model_two in the multimodel group.
+      key: model-user-params-1
+      value: --test_key_two test_value_two
     - category: Other
       description: Artifact path for model_two in the multimodel group.
       key: artifact_location-1
@@ -36,6 +52,14 @@ spec:
       description: Name of model_three in the multimodel group.
       key: model-name-2
       value: model_three
+    - category: Other
+      description: GPU count of model_three in the multimodel group.
+      key: model-gpu-count-2
+      value: 1
+    - category: Other
+      description: User params of model_three in the multimodel group.
+      key: model-user-params-2
+      value: --test_key_three test_value_three
     - category: Other
       description: Artifact path for model_three in the multimodel group.
       key: artifact_location-2
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index ad0078a30..ababfd2e4 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -600,6 +600,27 @@ class TestDataset:
         "model_c": [1, 2, 8],
     }
 
+    multi_model_deployment_model_attributes = [
+        {
+            "env_var": {"--test_key_one": "test_value_one"},
+            "gpu_count": 1,
+            "model_id": "ocid1.compartment.oc1..<OCID>",
+            "model_name": "model_one",
+        },
+        {
+            "env_var": {"--test_key_two": "test_value_two"},
+            "gpu_count": 1,
+            "model_id": "ocid1.compartment.oc1..<OCID>",
+            "model_name": "model_two",
+        },
+        {
+            "env_var": {"--test_key_three": "test_value_three"},
+            "gpu_count": 1,
+            "model_id": "ocid1.compartment.oc1..<OCID>",
+            "model_name": "model_three",
+        },
+    ]
+
 
 class TestAquaDeployment(unittest.TestCase):
     def setUp(self):
@@ -645,9 +666,9 @@ def test_list_deployments(self):
         expected_attributes = AquaDeployment.__annotations__.keys()
         for r in results:
             actual_attributes = r.to_dict()
-            assert set(actual_attributes) == set(expected_attributes), (
-                "Attributes mismatch"
-            )
+            assert set(actual_attributes) == set(
+                expected_attributes
+            ), "Attributes mismatch"
 
     @patch("ads.aqua.modeldeployment.deployment.get_resource_name")
     def test_get_deployment(self, mock_get_resource_name):
@@ -683,6 +704,52 @@ def test_get_deployment(self, mock_get_resource_name):
         assert result.log.name == "log-name"
         assert result.log_group.name == "log-group-name"
 
+    @patch("ads.model.DataScienceModel.from_id")
+    @patch("ads.aqua.modeldeployment.deployment.get_resource_name")
+    def test_get_multi_model_deployment(
+        self, mock_get_resource_name, mock_model_from_id
+    ):
+        multi_model_deployment = copy.deepcopy(
+            TestDataset.multi_model_deployment_object
+        )
+        self.app.ds_client.get_model_deployment = MagicMock(
+            return_value=oci.response.Response(
+                status=200,
+                request=MagicMock(),
+                headers=MagicMock(),
+                data=oci.data_science.models.ModelDeploymentSummary(
+                    **multi_model_deployment
+                ),
+            )
+        )
+        mock_get_resource_name.side_effect = lambda param: (
+            "log-group-name"
+            if param.startswith("ocid1.loggroup")
+            else "log-name"
+            if param.startswith("ocid1.log")
+            else ""
+        )
+
+        aqua_multi_model = os.path.join(
+            self.curr_dir, "test_data/deployment/aqua_multi_model.yaml"
+        )
+
+        mock_model_from_id.return_value = DataScienceModel.from_yaml(
+            uri=aqua_multi_model
+        )
+
+        result = self.app.get(model_deployment_id=TestDataset.MODEL_DEPLOYMENT_ID)
+
+        expected_attributes = set(AquaDeploymentDetail.__annotations__.keys()) | set(
+            AquaDeployment.__annotations__.keys()
+        )
+        actual_attributes = result.to_dict()
+        assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
+        assert len(result.models) == 3
+        assert [
+            model.model_dump() for model in result.models
+        ] == TestDataset.multi_model_deployment_model_attributes
+
     def test_get_deployment_missing_tags(self):
         """Test for returning a runtime error if OCI_AQUA tag is missing."""
         with pytest.raises(
@@ -811,7 +878,6 @@ def test_verify_compatibility(self):
         assert result[1] == 0
         assert result[2] == []
 
-
     @patch("ads.aqua.modeldeployment.deployment.get_container_config")
     @patch("ads.aqua.model.AquaModelApp.create")
     @patch("ads.aqua.modeldeployment.deployment.get_container_image")
@@ -1137,8 +1203,12 @@ def test_create_deployment_for_tei_byoc_embedding_model(
     @patch("ads.aqua.modeldeployment.deployment.get_container_image")
     @patch("ads.model.deployment.model_deployment.ModelDeployment.deploy")
     @patch("ads.aqua.modeldeployment.AquaDeploymentApp.get_deployment_config")
+    @patch(
+        "ads.aqua.modeldeployment.entities.CreateModelDeploymentDetails.validate_multimodel_deployment_feasibility"
+    )
     def test_create_deployment_for_multi_model(
         self,
+        mock_validate_multimodel_deployment_feasibility,
         mock_get_deployment_config,
         mock_deploy,
         mock_get_container_image,
@@ -1146,6 +1216,12 @@ def test_create_deployment_for_multi_model(
         mock_get_container_config,
     ):
         """Test to create a deployment for multi models."""
+        mock_validate_multimodel_deployment_feasibility.return_value = MagicMock()
+        self.app.get_multimodel_deployment_config = MagicMock(
+            return_value=AquaDeploymentConfig(
+                **TestDataset.aqua_deployment_multi_model_config_summary
+            )
+        )
         aqua_multi_model = os.path.join(
             self.curr_dir, "test_data/deployment/aqua_multi_model.yaml"
         )
@@ -1465,33 +1541,38 @@ def test_get_model_deployment_response(self, mock_post):
 class TestCreateModelDeploymentDetails:
     curr_dir = os.path.dirname(__file__)  # Define curr_dir
 
-    def validate_multimodel_deployment_feasibility_helper(self, models, instance_shape, display_name, total_gpus, multi_model="true"):
-        config_json = os.path.join(self.curr_dir, "test_data/deployment/aqua_summary_multi_model.json")
+    def validate_multimodel_deployment_feasibility_helper(
+        self, models, instance_shape, display_name, total_gpus, multi_model="true"
+    ):
+        config_json = os.path.join(
+            self.curr_dir, "test_data/deployment/aqua_summary_multi_model.json"
+        )
 
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
         if models:
-            aqua_models = [AquaMultiModelRef(model_id=x["ocid"], gpu_count=x["gpu_count"]) for x in models]
+            aqua_models = [
+                AquaMultiModelRef(model_id=x["ocid"], gpu_count=x["gpu_count"])
+                for x in models
+            ]
 
             mock_create_deployment_details = CreateModelDeploymentDetails(
-            models=aqua_models,
-            instance_shape=instance_shape,
-            display_name=display_name,
-            freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model}
-        )
+                models=aqua_models,
+                instance_shape=instance_shape,
+                display_name=display_name,
+                freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model},
+            )
         else:
-            model_id = 'model_a'
+            model_id = "model_a"
             mock_create_deployment_details = CreateModelDeploymentDetails(
-            model_id = model_id,
-            instance_shape=instance_shape,
-            display_name=display_name,
-            freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model}
-        )
+                model_id=model_id,
+                instance_shape=instance_shape,
+                display_name=display_name,
+                freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model},
+            )
 
-        mock_models_config_summary = ModelDeploymentConfigSummary(
-            **(config)
-        )
+        mock_models_config_summary = ModelDeploymentConfigSummary(**(config))
 
         mock_create_deployment_details.validate_multimodel_deployment_feasibility(
             models_config_summary=mock_models_config_summary
@@ -1504,37 +1585,40 @@ def validate_multimodel_deployment_feasibility_helper(self, models, instance_sha
                 [
                     {"ocid": "model_a", "gpu_count": 2},
                     {"ocid": "model_b", "gpu_count": 2},
-                    {"ocid": "model_c", "gpu_count": 2}
+                    {"ocid": "model_c", "gpu_count": 2},
                 ],
                 "BM.GPU.H100.8",
                 "test_a",
-                8
+                8,
             ),
             (
                 [
                     {"ocid": "model_a", "gpu_count": 2},
                     {"ocid": "model_b", "gpu_count": 1},
-                    {"ocid": "model_c", "gpu_count": 4}
+                    {"ocid": "model_c", "gpu_count": 4},
                 ],
                 "BM.GPU.H100.8",
                 "test_a",
-                8
+                8,
             ),
             (
                 [
                     {"ocid": "model_a", "gpu_count": 1},
                     {"ocid": "model_b", "gpu_count": 1},
-                    {"ocid": "model_c", "gpu_count": 2}
+                    {"ocid": "model_c", "gpu_count": 2},
                 ],
                 "BM.GPU.H100.8",
                 "test_a",
-                8
+                8,
             ),
-
-        ]
+        ],
     )
-    def test_validate_multimodel_deployment_feasibility_positive(self, models, instance_shape, display_name, total_gpus):
-        self.validate_multimodel_deployment_feasibility_helper(models, instance_shape, display_name, total_gpus)
+    def test_validate_multimodel_deployment_feasibility_positive(
+        self, models, instance_shape, display_name, total_gpus
+    ):
+        self.validate_multimodel_deployment_feasibility_helper(
+            models, instance_shape, display_name, total_gpus
+        )
 
     @pytest.mark.parametrize(
         "models, instance_shape, display_name, total_gpus, value_error",
@@ -1542,67 +1626,75 @@ def test_validate_multimodel_deployment_feasibility_positive(self, models, insta
             (
                 None,
                 "BM.GPU.H100.8",
-                'test_a',
+                "test_a",
                 8,
-                "Multi-model deployment requires at least one model, but none were provided. Please add one or more models to the model group to proceed."
-
+                "Multi-model deployment requires at least one model, but none were provided. Please add one or more models to the model group to proceed.",
             ),
             (
                 [
-                {"ocid": "model_a", "gpu_count" : 2},
-                {"ocid": "model_b", "gpu_count" : 2},
-                {"ocid": "model_c", "gpu_count" : 4}],
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 4},
+                ],
                 "invalid_shape",
-                'test_a',
+                "test_a",
                 8,
-                "The model group is not compatible with the selected instance shape 'invalid_shape'. Select a different instance shape."
+                "The model group is not compatible with the selected instance shape 'invalid_shape'. Select a different instance shape.",
             ),
             (
                 [
-                {"ocid": "model_a", "gpu_count" : 2},
-                {"ocid": "model_b", "gpu_count" : 2},
-                {"ocid": "model_c", "gpu_count" : 2},
-                {"ocid": "model_d", "gpu_count" : 2}],
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 2},
+                    {"ocid": "model_d", "gpu_count": 2},
+                ],
                 "BM.GPU.H100.8",
-                'test_a',
+                "test_a",
                 8,
-                "One or more selected models are missing from the configuration, preventing validation for deployment on the given shape."
-
+                "One or more selected models are missing from the configuration, preventing validation for deployment on the given shape.",
             ),
             (
                 [
-                {"ocid": "model_a", "gpu_count" : 2},
-                {"ocid": "model_b", "gpu_count" : 4}, # model_b lacks this entry in loaded config
-                {"ocid": "model_c", "gpu_count" : 2}],
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {
+                        "ocid": "model_b",
+                        "gpu_count": 4,
+                    },  # model_b lacks this entry in loaded config
+                    {"ocid": "model_c", "gpu_count": 2},
+                ],
                 "BM.GPU.H100.8",
-                'test_a',
+                "test_a",
                 8,
-                "Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
-
+                "Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape.",
             ),
             (
                 [
-                {"ocid": "model_a", "gpu_count" : 2},
-                {"ocid": "model_b", "gpu_count" : 2},
-                {"ocid": "model_c", "gpu_count" : 2}], # model c is lacks BM.GPU.A100-v2.8
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 2},
+                ],  # model c is lacks BM.GPU.A100-v2.8
                 "BM.GPU.A100-v2.8",
-                'test_a',
+                "test_a",
                 8,
-                "Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
+                "Select a different instance shape. One or more models in the group are incompatible with the selected instance shape.",
             ),
             (
                 [
-                {"ocid": "model_a", "gpu_count" : 4},
-                {"ocid": "model_b", "gpu_count" : 2},
-                {"ocid": "model_c", "gpu_count" : 4}],
+                    {"ocid": "model_a", "gpu_count": 4},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 4},
+                ],
                 "BM.GPU.H100.8",
-                'test_a',
+                "test_a",
                 8,
-                "Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
-
-            )
-        ]
+                "Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape.",
+            ),
+        ],
     )
-    def test_validate_multimodel_deployment_feasibility_negative(self, models, instance_shape, display_name, total_gpus, value_error):
+    def test_validate_multimodel_deployment_feasibility_negative(
+        self, models, instance_shape, display_name, total_gpus, value_error
+    ):
         with pytest.raises(ConfigValidationError, match=value_error):
-            self.validate_multimodel_deployment_feasibility_helper(models, instance_shape, display_name, total_gpus)
+            self.validate_multimodel_deployment_feasibility_helper(
+                models, instance_shape, display_name, total_gpus
+            )

From 187809308514f48c2cc7c1ebe6cfe669fd5d29b1 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 21 Feb 2025 16:11:30 -0500
Subject: [PATCH 053/124] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 8ed29e63e..c34f3aa35 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -891,7 +891,7 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail":
                             f"model-user-params-{idx}",
                             ModelCustomMetadataItem(key=f"model-user-params-{idx}"),
                         ).value
-                        or UNKNOWN_DICT
+                        or UNKNOWN
                     ),
                 )
                 for idx in range(model_group_count)

From 6689440cc954812ce97d9967b803d9a09dce2f7e Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 21 Feb 2025 15:09:27 -0800
Subject: [PATCH 054/124] Fixes unit tests

---
 ads/aqua/common/enums.py                           |  2 +-
 ads/aqua/model/model.py                            |  6 +++---
 ads/aqua/modeldeployment/entities.py               | 14 ++++++++------
 .../deployment/aqua_create_gguf_deployment.yaml    |  2 +-
 .../deployment/aqua_create_multi_deployment.yaml   |  2 +-
 .../test_data/deployment/aqua_multi_model.yaml     |  2 +-
 tests/unitary/with_extras/aqua/test_deployment.py  | 12 ++++++------
 tests/unitary/with_extras/aqua/test_finetuning.py  |  2 +-
 .../with_extras/aqua/test_finetuning_handler.py    |  2 +-
 tests/unitary/with_extras/aqua/test_model.py       |  2 +-
 tests/unitary/with_extras/aqua/utils.py            |  2 +-
 11 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
index 5d672b036..0b9e6ab47 100644
--- a/ads/aqua/common/enums.py
+++ b/ads/aqua/common/enums.py
@@ -41,7 +41,7 @@ class Tags(ExtendedEnum):
     AQUA_EVALUATION_MODEL_ID = "evaluation_model_id"
     MODEL_FORMAT = "model_format"
     MODEL_ARTIFACT_FILE = "model_file"
-    MULTIMODEL_TYPE_TAG = "multimodel"
+    MULTIMODEL_TYPE_TAG = "aqua_multimodel"
 
 
 class InferenceContainerType(ExtendedEnum):
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 22d32a636..772bdc474 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -369,7 +369,7 @@ def create_multi(
 
         # Combine tags
         tags = {
-            Tags.AQUA_TAG: "active",
+            # Tags.AQUA_TAG: "active",
             Tags.MULTIMODEL_TYPE_TAG: "true",
             **(freeform_tags or {}),
         }
@@ -1809,8 +1809,8 @@ def _rqs(self, compartment_id: str, model_type="FT", **kwargs):
             filter_tag = Tags.AQUA_FINE_TUNED_MODEL_TAG
         elif model_type == ModelType.BASE:
             filter_tag = Tags.BASE_MODEL_CUSTOM
-        elif model_type == ModelType.MULTIMODEL:
-            filter_tag = Tags.MULTIMODEL_TYPE_TAG
+        # elif model_type == ModelType.MULTIMODEL:
+        #     filter_tag = Tags.MULTIMODEL_TYPE_TAG
         else:
             raise AquaValueError(
                 f"Model of type {model_type} is unknown. The values should be in {ModelType.values()}"
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index b93f5286d..d3bd8cd3b 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -164,6 +164,7 @@ def from_oci_model_deployment(
 
     class Config:
         extra = "ignore"
+        protected_namespaces = ()
 
 
 class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
@@ -180,16 +181,16 @@ class ShapeInfoConfig(Serializable):
     """Describes how many memory and cpu to this model for specific shape.
 
     Attributes:
-        memory_in_gbs (int, optional): The number of memory in gbs to this model of the shape.
-        ocpu (int, optional): The number of ocpus to this model of the shape.
+        memory_in_gbs (float, optional): The number of memory in gbs to this model of the shape.
+        ocpu (float, optional): The number of ocpus to this model of the shape.
     """
 
-    memory_in_gbs: Optional[int] = Field(
-        default_factory=int,
+    memory_in_gbs: Optional[float] = Field(
+        None,
         description="The number of memory in gbs to this model of the shape.",
     )
-    ocpu: Optional[int] = Field(
-        default_factory=int,
+    ocpu: Optional[float] = Field(
+        None,
         description="The number of ocpus to this model of the shape.",
     )
 
@@ -558,3 +559,4 @@ def validate_multimodel_deployment_feasibility(
 
     class Config:
         extra = "ignore"
+        protected_namespaces = ()
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_gguf_deployment.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_gguf_deployment.yaml
index 29d0db8c3..2d394adfa 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_gguf_deployment.yaml
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_gguf_deployment.yaml
@@ -18,7 +18,7 @@ spec:
       shapeName: "VM.Standard.A1.Flex"
       shapeConfigDetails:
         memoryInGBs: 60.0
-        ocpus: 10
+        ocpus: 10.0
     type: datascienceModelDeployment
   lifecycleState: CREATING
   modelDeploymentUrl: "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
index fdc9a6fa9..9e33aaa6f 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml
@@ -4,7 +4,7 @@ spec:
   displayName: multi-model-deployment-name
   freeformTags:
     OCI_AQUA: active
-    multimodel: "true"
+    aqua_multimodel: "true"
     aqua_model_id: model-id
   id: "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
   infrastructure:
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
index 00f141bd3..6cb0df3fc 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
@@ -77,7 +77,7 @@ spec:
   displayName: multi_model
   freeformTags:
     OCI_AQUA: active
-    multimodel: "true"
+    aqua_multimodel: "true"
     license: Apache 2.0
     organization: Organization
     ready_to_fine_tune: false
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index ababfd2e4..138e15977 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -180,7 +180,7 @@ class TestDataset:
         "freeform_tags": {
             "OCI_AQUA": "active",
             "aqua_model_id": "model-id",
-            "multimodel": "true",
+            "aqua_multimodel": "true",
         },
         "id": "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>",
         "lifecycle_state": "ACTIVE",
@@ -264,7 +264,7 @@ class TestDataset:
                                     "instance_shape_name": DEPLOYMENT_SHAPE_NAME_CPU,
                                     "model_deployment_instance_shape_config_details": oci.data_science.models.ModelDeploymentInstanceShapeConfigDetails(
                                         **{
-                                            "ocpus": 10,
+                                            "ocpus": 10.0,
                                             "memory_in_gbs": 60.0,
                                         }
                                     ),
@@ -366,7 +366,7 @@ class TestDataset:
         "created_on": "2024-01-01T00:00:00.000000+00:00",
         "created_by": "ocid1.user.oc1..<OCID>",
         "endpoint": MODEL_DEPLOYMENT_URL,
-        "private_endpoint_id": "",
+        "private_endpoint_id": None,
         "models": [],
         "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
         "environment_variables": {
@@ -397,7 +397,7 @@ class TestDataset:
         "created_on": "2024-01-01T00:00:00.000000+00:00",
         "created_by": "ocid1.user.oc1..<OCID>",
         "endpoint": MODEL_DEPLOYMENT_URL,
-        "private_endpoint_id": "",
+        "private_endpoint_id": None,
         "models": [
             {
                 "env_var": {},
@@ -435,7 +435,7 @@ class TestDataset:
         "tags": {
             "OCI_AQUA": "active",
             "aqua_model_id": "model-id",
-            "multimodel": "true",
+            "aqua_multimodel": "true",
         },
     }
 
@@ -450,7 +450,7 @@ class TestDataset:
     aqua_deployment_gguf_shape_info = {
         "instance_shape": DEPLOYMENT_SHAPE_NAME_CPU,
         "instance_count": 1,
-        "ocpus": 10,
+        "ocpus": 10.0,
         "memory_in_gbs": 60.0,
     }
 
diff --git a/tests/unitary/with_extras/aqua/test_finetuning.py b/tests/unitary/with_extras/aqua/test_finetuning.py
index 1e8e21ebf..533fa368d 100644
--- a/tests/unitary/with_extras/aqua/test_finetuning.py
+++ b/tests/unitary/with_extras/aqua/test_finetuning.py
@@ -10,7 +10,7 @@
 from parameterized import parameterized
 from unittest import TestCase
 from unittest.mock import MagicMock, PropertyMock
-from mock import patch
+from unittest.mock import patch
 from importlib import reload
 
 import ads.aqua
diff --git a/tests/unitary/with_extras/aqua/test_finetuning_handler.py b/tests/unitary/with_extras/aqua/test_finetuning_handler.py
index 6f1579407..b5ff81a1c 100644
--- a/tests/unitary/with_extras/aqua/test_finetuning_handler.py
+++ b/tests/unitary/with_extras/aqua/test_finetuning_handler.py
@@ -7,7 +7,7 @@
 from unittest import TestCase
 from unittest.mock import MagicMock
 
-from mock import patch
+from unittest.mock import patch
 from notebook.base.handlers import IPythonHandler
 
 from ads.aqua.extension.finetune_handler import (
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index f29150ab0..fe18b9597 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -449,7 +449,7 @@ def test_create_multimodel(
         mock_from_id.return_value = mock_model
         mock_create.return_value = mock_model
 
-        assert model.freeform_tags == {"OCI_AQUA": "active", "multimodel": "true"}
+        assert model.freeform_tags == {"OCI_AQUA": "active", "aqua_multimodel": "true"}
         assert model.custom_metadata_list.get("model_group_count").value == "2"
         assert (
             model.custom_metadata_list.get("deployment-container").value
diff --git a/tests/unitary/with_extras/aqua/utils.py b/tests/unitary/with_extras/aqua/utils.py
index 21e61bcc1..bdc72e3b5 100644
--- a/tests/unitary/with_extras/aqua/utils.py
+++ b/tests/unitary/with_extras/aqua/utils.py
@@ -29,7 +29,7 @@ class HandlerTestDataset:
         block_storage_size=1,
         experiment_name="test_experiment_name",
         memory_in_gbs=1,
-        ocpus=1,
+        ocpus=1.0,
     )
     mock_invalid_input = dict(name="myvalue")
     mock_dataclass_obj = MockData(id="myid", name="myname")

From a2adbf676a6ecb3abb03791ff0a9c89da5274495 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 21 Feb 2025 21:35:36 -0800
Subject: [PATCH 055/124] Fix unit tests

---
 tests/unitary/with_extras/aqua/test_model.py | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index fe18b9597..deddceb35 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -367,7 +367,7 @@ def test_create_multimodel(
         mock_model.display_name = "test_display_name"
         mock_model.description = "test_description"
         mock_model.freeform_tags = {
-            "OCI_AQUA": "ACTIVE",
+            # "OCI_AQUA": "ACTIVE",
         }
         mock_model.id = "mock_model_id"
         mock_model.artifact = "mock_artifact_path"
@@ -391,10 +391,7 @@ def test_create_multimodel(
             env_var={"params": "--trust-remote-code --max-model-len 32000"},
         )
 
-        with pytest.raises(
-            AquaValueError,
-            match="Invalid selected model test_display_name. Currently only service models are supported for multi model deployment.",
-        ):
+        with pytest.raises(AquaValueError):
             model = self.app.create_multi(
                 models=[model_info_1, model_info_2],
                 project_id="test_project_id",
@@ -403,10 +400,7 @@ def test_create_multimodel(
 
         mock_model.freeform_tags["aqua_service_model"] = TestDataset.SERVICE_MODEL_ID
 
-        with pytest.raises(
-            AquaValueError,
-            match="Invalid or missing task tag for selected model test_display_name. Currently only text-generation models are support for multi model deployment.",
-        ):
+        with pytest.raises(AquaValueError):
             model = self.app.create_multi(
                 models=[model_info_1, model_info_2],
                 project_id="test_project_id",
@@ -415,10 +409,7 @@ def test_create_multimodel(
 
         mock_model.freeform_tags["task"] = "text-generation"
 
-        with pytest.raises(
-            AquaValueError,
-            match="Unsupported deployment container 'odsc-tgi-serving' for model 'mock_model_id'. Only 'odsc-vllm-serving' is supported for multi-model deployments.",
-        ):
+        with pytest.raises(AquaValueError):
             model = self.app.create_multi(
                 models=[model_info_1, model_info_2],
                 project_id="test_project_id",
@@ -449,7 +440,7 @@ def test_create_multimodel(
         mock_from_id.return_value = mock_model
         mock_create.return_value = mock_model
 
-        assert model.freeform_tags == {"OCI_AQUA": "active", "aqua_multimodel": "true"}
+        assert model.freeform_tags == {"aqua_multimodel": "true"}
         assert model.custom_metadata_list.get("model_group_count").value == "2"
         assert (
             model.custom_metadata_list.get("deployment-container").value

From fd292abbfe9c18d0c1e4c9ebfc29dac51d999805 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Tue, 25 Feb 2025 15:36:19 -0800
Subject: [PATCH 056/124] added unit tests and finished validation method in
 evaluation.py

---
 ads/aqua/evaluation/evaluation.py             | 60 ++++++++++++++++-
 .../with_extras/aqua/test_evaluation.py       | 65 ++++++++++++++++++-
 2 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 13adf0bdb..5d3c2aa30 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -75,6 +75,7 @@
     CreateAquaEvaluationDetails,
 )
 from ads.aqua.evaluation.errors import EVALUATION_JOB_EXIT_CODE_MESSAGE
+from ads.aqua.model.constants import ModelCustomMetadataFields
 from ads.aqua.ui import AquaContainerConfig
 from ads.common.auth import default_signer
 from ads.common.object_storage_details import ObjectStorageDetails
@@ -183,6 +184,26 @@ def create(
             evaluation_source = ModelDeployment.from_id(
                 create_aqua_evaluation_details.evaluation_source_id
             )
+            try:
+                if Tags.MULTIMODEL_TYPE_TAG in evaluation_source.freeform_tags:
+                    multi_model_id = evaluation_source.freeform_tags.get(
+                        Tags.AQUA_MODEL_ID_TAG, UNKNOWN
+                    )
+
+                    if not multi_model_id:
+                        raise AquaRuntimeError(
+                            f"Invalid multi model deployment {multi_model_id}."
+                            f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
+                        )
+
+                    aqua_model = DataScienceModel.from_id(multi_model_id)
+                    AquaEvaluationApp.validate_name_multi_model(
+                        aqua_model, create_aqua_evaluation_details
+                    )
+
+            except (AquaRuntimeError, AquaValueError) as err:
+                raise AquaValueError(f"{err}") from err
+
             try:
                 if (
                     evaluation_source.runtime.type
@@ -550,6 +571,43 @@ def create(
             parameters=AquaEvalParams(),
         )
 
+    @staticmethod
+    def validate_name_multi_model(
+        evaluation_source: DataScienceModel,
+        create_aqua_evaluation_details: CreateAquaEvaluationDetails,
+    ):
+        user_model_parameters = create_aqua_evaluation_details.model_parameters
+        if "name" not in user_model_parameters:
+            logger.debug(
+                f"User did not input model name for multi model deployment evaluation with evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
+            )
+            raise AquaValueError(
+                "Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment."
+            )
+
+        custom_metadata_list = evaluation_source.custom_metadata_list
+        user_model_name = user_model_parameters.get("name")
+
+        model_group_count = int(
+            custom_metadata_list.get(
+                ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
+            ).value
+        )
+
+        model_names = [
+            custom_metadata_list.get(f"model-name-{idx}").value
+            for idx in range(model_group_count)
+        ]
+
+        if user_model_name not in model_names:
+            valid_model_names = ", ".join(map(str, model_names))
+            logger.debug(
+                f"User input for model name was {user_model_name}, expected {valid_model_names} evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
+            )
+            raise AquaValueError(
+                f"Provide the correct model name. The valid model names for this Model Deployment are {valid_model_names}."
+            )
+
     def _build_evaluation_runtime(
         self,
         evaluation_id: str,
@@ -1392,7 +1450,7 @@ def _fetch_jobrun(
             )
         except Exception as e:
             logger.debug(
-                f"Failed to retreive job run: {jobrun_id}. " f"DEBUG INFO: {str(e)}"
+                f"Failed to retreive job run: {jobrun_id}. DEBUG INFO: {str(e)}"
             )
             jobrun = None
 
diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py
index ef3475184..45ab2dc84 100644
--- a/tests/unitary/with_extras/aqua/test_evaluation.py
+++ b/tests/unitary/with_extras/aqua/test_evaluation.py
@@ -34,6 +34,7 @@
     AquaEvalMetrics,
     AquaEvalReport,
     AquaEvaluationSummary,
+    CreateAquaEvaluationDetails,
 )
 from ads.aqua.extension.base_handler import AquaAPIhandler
 from ads.jobs.ads_job import DataScienceJob, DataScienceJobRun, Job
@@ -353,6 +354,7 @@ class TestDataset:
     COMPARTMENT_ID = "ocid1.compartment.oc1..<UNIQUE_OCID>"
     EVAL_ID = "ocid1.datasciencemodel.oc1.iad.<OCID>"
     INVALID_EVAL_ID = "ocid1.datasciencemodel.oc1.phx.<OCID>"
+    MODEL_DEPLOYMENT_ID = "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
 
 
 class TestAquaEvaluation(unittest.TestCase):
@@ -449,7 +451,7 @@ def test_create_evaluation(
         mock_from_id.return_value = foundation_model
 
         experiment = MagicMock()
-        experiment.id = "test_experiment_id"
+        experiment.id = "ocid1.datasciencemodelversionset.oc1.iad.amaaaaaav66vvniakngdzelb5hcgjd6yvfejksu2excidvvi3s5s5whtmdea"
         mock_mvs_create.return_value = experiment
 
         evaluation_model = MagicMock()
@@ -533,6 +535,67 @@ def test_create_evaluation(
             "time_created": f"{oci_dsc_model.time_created}",
         }
 
+    @parameterized.expand(
+    [
+        (
+            {"name": "model_one"},
+            None
+        ),
+        (
+            {},
+            "Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment."
+        ),
+        (
+            {"name": "wrong_model_name"},
+            "Provide the correct model name. The valid model names for this Model Deployment are model_one, model_two, model_three."
+        )
+    ])
+    @patch("ads.aqua.evaluation.evaluation.AquaEvaluationApp.create")
+    def test_validate_multi_model_evaluation(
+        self,
+        mock_model_parameters,
+        expected_message,
+        mock_model
+    ):
+        curr_dir = os.path.dirname(__file__)
+
+        eval_model_freeform_tags = {"ftag1": "fvalue1", "ftag2": "fvalue2"}
+        eval_model_defined_tags = {"dtag1": "dvalue1", "dtag2": "dvalue2"}
+
+        eval_model_freeform_tags[Tags.MULTIMODEL_TYPE_TAG] = "true"
+        eval_model_freeform_tags[Tags.AQUA_TAG] = "active"
+
+        create_aqua_evaluation_details = dict(  # noqa: C408
+            evaluation_source_id= TestDataset.MODEL_DEPLOYMENT_ID,
+            evaluation_name="test_evaluation_name",
+            dataset_path="oci://dataset_bucket@namespace/prefix/dataset.jsonl",
+            report_path="oci://report_bucket@namespace/prefix/",
+            model_parameters=mock_model_parameters,
+            shape_name="VM.Standard.E3.Flex",
+            block_storage_size=1,
+            experiment_name="test_experiment_name",
+            memory_in_gbs=1,
+            ocpus=1,
+            freeform_tags=eval_model_freeform_tags,
+            defined_tags=eval_model_defined_tags,
+        )
+
+
+        aqua_multi_model = os.path.join(
+            curr_dir, "test_data/deployment/aqua_multi_model.yaml"
+        )
+
+        mock_model = DataScienceModel.from_yaml(
+            uri=aqua_multi_model
+        )
+
+        mock_create_aqua_evaluation_details = MagicMock(**create_aqua_evaluation_details, spec=CreateAquaEvaluationDetails)
+
+        try:
+            AquaEvaluationApp.validate_name_multi_model(mock_model, mock_create_aqua_evaluation_details)
+        except Exception as e:
+            self.assertEqual(str(e), expected_message)
+
     def test_get_service_model_name(self):
         # get service model name from fine tuned model deployment
         source = ModelDeployment().with_freeform_tags(

From a4e2da02fbc48893e37d5df4805f96015d9c0eb1 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Tue, 25 Feb 2025 16:02:41 -0800
Subject: [PATCH 057/124] fixed model parameter name

---
 ads/aqua/evaluation/evaluation.py                 | 4 ++--
 tests/unitary/with_extras/aqua/test_evaluation.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 5d3c2aa30..29bc90ee2 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -577,7 +577,7 @@ def validate_name_multi_model(
         create_aqua_evaluation_details: CreateAquaEvaluationDetails,
     ):
         user_model_parameters = create_aqua_evaluation_details.model_parameters
-        if "name" not in user_model_parameters:
+        if "model" not in user_model_parameters:
             logger.debug(
                 f"User did not input model name for multi model deployment evaluation with evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
             )
@@ -586,7 +586,7 @@ def validate_name_multi_model(
             )
 
         custom_metadata_list = evaluation_source.custom_metadata_list
-        user_model_name = user_model_parameters.get("name")
+        user_model_name = user_model_parameters.get("model")
 
         model_group_count = int(
             custom_metadata_list.get(
diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py
index 45ab2dc84..9e419285c 100644
--- a/tests/unitary/with_extras/aqua/test_evaluation.py
+++ b/tests/unitary/with_extras/aqua/test_evaluation.py
@@ -538,7 +538,7 @@ def test_create_evaluation(
     @parameterized.expand(
     [
         (
-            {"name": "model_one"},
+            {"model": "model_one"},
             None
         ),
         (
@@ -546,7 +546,7 @@ def test_create_evaluation(
             "Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment."
         ),
         (
-            {"name": "wrong_model_name"},
+            {"model": "wrong_model_name"},
             "Provide the correct model name. The valid model names for this Model Deployment are model_one, model_two, model_three."
         )
     ])

From b25fa2e3d419135c090e3dbeefe0d80853d3fcb1 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Tue, 25 Feb 2025 17:55:52 -0800
Subject: [PATCH 058/124] added docstring, fixed PR comments

---
 ads/aqua/evaluation/evaluation.py             | 45 ++++++++++++++-----
 .../with_extras/aqua/test_evaluation.py       | 11 ++---
 2 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 29bc90ee2..33bd2bf24 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -31,6 +31,7 @@
     Tags,
 )
 from ads.aqua.common.errors import (
+    AquaError,
     AquaFileExistsError,
     AquaFileNotFoundError,
     AquaMissingKeyError,
@@ -197,11 +198,11 @@ def create(
                         )
 
                     aqua_model = DataScienceModel.from_id(multi_model_id)
-                    AquaEvaluationApp.validate_name_multi_model(
+                    AquaEvaluationApp.validate_model_name(
                         aqua_model, create_aqua_evaluation_details
                     )
 
-            except (AquaRuntimeError, AquaValueError) as err:
+            except AquaError as err:
                 raise AquaValueError(f"{err}") from err
 
             try:
@@ -572,18 +573,28 @@ def create(
         )
 
     @staticmethod
-    def validate_name_multi_model(
+    def validate_model_name(
         evaluation_source: DataScienceModel,
         create_aqua_evaluation_details: CreateAquaEvaluationDetails,
-    ):
+    ) -> None:
+        """
+        Validates the user input of the model name when creating an Aqua evaluation.
+
+        Parameters
+        ----------
+        evaluation_source: DataScienceModel
+            The DataScienceModel Object which contains all metadata
+            about each model in a single and multi model deployment.
+        create_aqua_evaluation_details: CreateAquaEvaluationDetails
+            The CreateAquaEvaluationDetails data class which contains all
+            required and optional fields to create the aqua evaluation.
+
+        Raises
+        -------
+        AquaValueError:
+            - When the user fails to specify any input for the model name.
+            - When the user supplies a model name that does not match the model name set in the DataScienceModel metadata."""
         user_model_parameters = create_aqua_evaluation_details.model_parameters
-        if "model" not in user_model_parameters:
-            logger.debug(
-                f"User did not input model name for multi model deployment evaluation with evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
-            )
-            raise AquaValueError(
-                "Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment."
-            )
 
         custom_metadata_list = evaluation_source.custom_metadata_list
         user_model_name = user_model_parameters.get("model")
@@ -599,8 +610,18 @@ def validate_name_multi_model(
             for idx in range(model_group_count)
         ]
 
+        valid_model_names = ", ".join(map(str, model_names))
+
+        if "model" not in user_model_parameters:
+            logger.debug(
+                f"User did not input model name for multi model deployment evaluation with evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
+            )
+            raise AquaValueError(
+                f"Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment. The valid model names for this Model Deployment are {valid_model_names}."
+            )
+
         if user_model_name not in model_names:
-            valid_model_names = ", ".join(map(str, model_names))
+
             logger.debug(
                 f"User input for model name was {user_model_name}, expected {valid_model_names} evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
             )
diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py
index 9e419285c..29c1cc58e 100644
--- a/tests/unitary/with_extras/aqua/test_evaluation.py
+++ b/tests/unitary/with_extras/aqua/test_evaluation.py
@@ -17,6 +17,7 @@
 from ads.aqua.common import utils
 from ads.aqua.common.enums import Tags
 from ads.aqua.common.errors import (
+    AquaError,
     AquaFileNotFoundError,
     AquaMissingKeyError,
     AquaRuntimeError,
@@ -451,7 +452,7 @@ def test_create_evaluation(
         mock_from_id.return_value = foundation_model
 
         experiment = MagicMock()
-        experiment.id = "ocid1.datasciencemodelversionset.oc1.iad.amaaaaaav66vvniakngdzelb5hcgjd6yvfejksu2excidvvi3s5s5whtmdea"
+        experiment.id = "test_experiment_id"
         mock_mvs_create.return_value = experiment
 
         evaluation_model = MagicMock()
@@ -543,7 +544,7 @@ def test_create_evaluation(
         ),
         (
             {},
-            "Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment."
+            "Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment. The valid model names for this Model Deployment are model_one, model_two, model_three."
         ),
         (
             {"model": "wrong_model_name"},
@@ -551,7 +552,7 @@ def test_create_evaluation(
         )
     ])
     @patch("ads.aqua.evaluation.evaluation.AquaEvaluationApp.create")
-    def test_validate_multi_model_evaluation(
+    def test_validate_model_name(
         self,
         mock_model_parameters,
         expected_message,
@@ -592,8 +593,8 @@ def test_validate_multi_model_evaluation(
         mock_create_aqua_evaluation_details = MagicMock(**create_aqua_evaluation_details, spec=CreateAquaEvaluationDetails)
 
         try:
-            AquaEvaluationApp.validate_name_multi_model(mock_model, mock_create_aqua_evaluation_details)
-        except Exception as e:
+            AquaEvaluationApp.validate_model_name(mock_model, mock_create_aqua_evaluation_details)
+        except AquaError as e:
             self.assertEqual(str(e), expected_message)
 
     def test_get_service_model_name(self):

From 7e9d46e54c1caa29e37a7e0bea91a19f58f058c3 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Wed, 26 Feb 2025 13:37:31 -0800
Subject: [PATCH 059/124] fixed PR comments

---
 ads/aqua/evaluation/evaluation.py             | 50 ++++++++++---------
 .../with_extras/aqua/test_evaluation.py       |  6 +--
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 33bd2bf24..47497aa26 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -185,25 +185,22 @@ def create(
             evaluation_source = ModelDeployment.from_id(
                 create_aqua_evaluation_details.evaluation_source_id
             )
-            try:
-                if Tags.MULTIMODEL_TYPE_TAG in evaluation_source.freeform_tags:
-                    multi_model_id = evaluation_source.freeform_tags.get(
-                        Tags.AQUA_MODEL_ID_TAG, UNKNOWN
-                    )
 
-                    if not multi_model_id:
-                        raise AquaRuntimeError(
-                            f"Invalid multi model deployment {multi_model_id}."
-                            f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
-                        )
+            if Tags.MULTIMODEL_TYPE_TAG in evaluation_source.freeform_tags:
+                multi_model_id = evaluation_source.freeform_tags.get(
+                    Tags.AQUA_MODEL_ID_TAG, UNKNOWN
+                )
 
-                    aqua_model = DataScienceModel.from_id(multi_model_id)
-                    AquaEvaluationApp.validate_model_name(
-                        aqua_model, create_aqua_evaluation_details
+                if not multi_model_id:
+                    raise AquaRuntimeError(
+                        f"Invalid multi model deployment {multi_model_id}."
+                        f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
                     )
 
-            except AquaError as err:
-                raise AquaValueError(f"{err}") from err
+                aqua_model = DataScienceModel.from_id(multi_model_id)
+                AquaEvaluationApp.validate_model_name(
+                    aqua_model, create_aqua_evaluation_details
+                )
 
             try:
                 if (
@@ -593,24 +590,31 @@ def validate_model_name(
         -------
         AquaValueError:
             - When the user fails to specify any input for the model name.
-            - When the user supplies a model name that does not match the model name set in the DataScienceModel metadata."""
+            - When the user supplies a model name that does not match the model name set in the DataScienceModel metadata.
+            - When the DataScienceModel metadata lacks core attributes for validating the name"""
         user_model_parameters = create_aqua_evaluation_details.model_parameters
 
         custom_metadata_list = evaluation_source.custom_metadata_list
         user_model_name = user_model_parameters.get("model")
 
-        model_group_count = int(
-            custom_metadata_list.get(
-                ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
-            ).value
-        )
+        model_count = custom_metadata_list.get(ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT)
+
+        if model_count and custom_metadata_list:
+            model_group_count = int(model_count.value)
+        else:
+            logger.debug(
+                f"The ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT or custom_metadata_list (ModelCustomMetadata) is missing from the metadata in evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
+            )
+            raise AquaRuntimeError(
+                "Recreate the model deployment and retry the evaluation. An issue occured when initalizing the model group during deployment."
+            )
 
         model_names = [
-            custom_metadata_list.get(f"model-name-{idx}").value
+            custom_metadata_list.get(f"model-name-{idx}")
             for idx in range(model_group_count)
         ]
 
-        valid_model_names = ", ".join(map(str, model_names))
+        valid_model_names = ", ".join(name.value for name in model_names if name is not None)
 
         if "model" not in user_model_parameters:
             logger.debug(
diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py
index 29c1cc58e..1a88edc50 100644
--- a/tests/unitary/with_extras/aqua/test_evaluation.py
+++ b/tests/unitary/with_extras/aqua/test_evaluation.py
@@ -38,6 +38,7 @@
     CreateAquaEvaluationDetails,
 )
 from ads.aqua.extension.base_handler import AquaAPIhandler
+from ads.aqua.model.constants import ModelCustomMetadataFields
 from ads.jobs.ads_job import DataScienceJob, DataScienceJobRun, Job
 from ads.model import DataScienceModel
 from ads.model.deployment.model_deployment import ModelDeployment
@@ -538,10 +539,6 @@ def test_create_evaluation(
 
     @parameterized.expand(
     [
-        (
-            {"model": "model_one"},
-            None
-        ),
         (
             {},
             "Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment. The valid model names for this Model Deployment are model_one, model_two, model_three."
@@ -595,6 +592,7 @@ def test_validate_model_name(
         try:
             AquaEvaluationApp.validate_model_name(mock_model, mock_create_aqua_evaluation_details)
         except AquaError as e:
+            print(str(e))
             self.assertEqual(str(e), expected_message)
 
     def test_get_service_model_name(self):

From ce44b27b7cf16cc49b6efa13e9737a1fa3834bf5 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 26 Feb 2025 18:38:32 -0500
Subject: [PATCH 060/124] Fixed service model name

---
 ads/aqua/common/utils.py               | 19 +++++++++++++++++++
 ads/aqua/model/model.py                |  1 +
 ads/aqua/modeldeployment/deployment.py | 14 ++++++--------
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index f4e002d19..2684b9e04 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -939,6 +939,25 @@ def get_combined_params(params1: str = None, params2: str = None) -> str:
     return " ".join(combined_params)
 
 
+def build_params_string(params: dict) -> str:
+    """Builds params string from params dict
+
+    Parameters
+    ----------
+    params:
+        Parameter dict with key-value pairs
+
+    Returns
+    -------
+        A params string.
+    """
+    return (
+        " ".join(f"{name} {value}" for name, value in params.items()).strip()
+        if params
+        else UNKNOWN
+    )
+
+
 def copy_model_config(artifact_path: str, os_path: str, auth: dict = None):
     """Copies the aqua model config folder from the artifact path to the user provided object storage path.
     The config folder is overwritten if the files already exist at the destination path.
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 772bdc474..cd4114de7 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -264,6 +264,7 @@ def create_multi(
         for idx, model in enumerate(models):
             source_model = DataScienceModel.from_id(model.model_id)
             display_name = source_model.display_name
+            model.model_name = model.model_name or display_name
 
             if not source_model.freeform_tags.get(Tags.AQUA_SERVICE_MODEL_TAG, UNKNOWN):
                 raise AquaValueError(
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index c34f3aa35..53195d1e3 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -13,6 +13,7 @@
 from ads.aqua.common.enums import InferenceContainerTypeFamily, Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
+    build_params_string,
     build_pydantic_error_message,
     get_combined_params,
     get_container_config,
@@ -470,13 +471,7 @@ def _create_multi(
         container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN).strip()
 
         for idx, model in enumerate(create_deployment_details.models):
-            user_params = (
-                " ".join(
-                    f"{name} {value}" for name, value in model.env_var.items()
-                ).strip()
-                if model.env_var
-                else UNKNOWN
-            )
+            user_params = build_params_string(model.env_var)
             if user_params:
                 restricted_params = self._find_restricted_params(
                     container_params, user_params, container_type_key
@@ -489,7 +484,10 @@ def _create_multi(
                         f"Select other parameters for model {selected_model}."
                     )
 
-            params = container_params
+            # replaces `--served-model-name`` with user's model name
+            container_params_dict = get_params_dict(container_params)
+            container_params_dict.update({"--served-model-name": model.model_name})
+            params = build_params_string(container_params_dict)
             deployment_config = self.get_deployment_config(model.model_id)
             multi_model_deployment = deployment_config.configuration.get(
                 create_deployment_details.instance_shape, ConfigurationItem()

From 82d8d534f9452fcb37f36a339fef2bb43aad233a Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Thu, 27 Feb 2025 16:58:28 -0800
Subject: [PATCH 061/124] Adds more debug statements

---
 ads/aqua/app.py                   | 11 ++++++++---
 ads/aqua/common/utils.py          |  2 +-
 ads/aqua/modeldeployment/utils.py |  8 ++++++++
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/app.py b/ads/aqua/app.py
index 937a0efdd..29df7a687 100644
--- a/ads/aqua/app.py
+++ b/ads/aqua/app.py
@@ -339,6 +339,9 @@ def get_config(
         config_file_path = os.path.join(config_path, config_file_name)
         if is_path_exists(config_file_path):
             try:
+                logger.info(
+                    f"Loading config: `{config_file_name}` from `{config_path}`"
+                )
                 config = load_config(
                     config_path,
                     config_file_name=config_file_name,
@@ -378,9 +381,11 @@ def build_cli(self) -> str:
         """
         cmd = f"ads aqua {self._command}"
         params = [
-            f"--{field.name} {json.dumps(getattr(self, field.name))}"
-            if isinstance(getattr(self, field.name), dict)
-            else f"--{field.name} {getattr(self, field.name)}"
+            (
+                f"--{field.name} {json.dumps(getattr(self, field.name))}"
+                if isinstance(getattr(self, field.name), dict)
+                else f"--{field.name} {getattr(self, field.name)}"
+            )
             for field in fields(self.__class__)
             if getattr(self, field.name) is not None
         ]
diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index 2684b9e04..6eed32ba9 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -252,7 +252,7 @@ def load_config(file_path: str, config_file_name: str, **kwargs) -> dict:
     return config
 
 
-def list_os_files_with_extension(oss_path: str, extension: str) -> [str]:
+def list_os_files_with_extension(oss_path: str, extension: str) -> List[str]:
     """
     List files in the specified directory with the given extension.
 
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 67d2d9ed8..9ddeb76a1 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -62,7 +62,10 @@ def load(
             cannot be determined, an appropriate error message is included in the summary.
         """
         # Fetch deployment configurations concurrently.
+        logger.debug(f"Loading model deployment configuration for models: {model_ids}")
         deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
+
+        logger.debug(f"Loaded config: {deployment_configs}")
         model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
 
         # Initialize the summary result with the deployment configurations.
@@ -80,6 +83,8 @@ def load(
 
         # Identify common deployment shapes among all models.
         common_shapes = self._get_common_shapes(model_shape_gpu)
+        logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
+
         if not common_shapes:
             summary.error_message = (
                 "The selected models do not share any common deployment shapes. "
@@ -94,6 +99,9 @@ def load(
         gpu_allocation = self._compute_gpu_allocation(
             common_shapes, model_shape_gpu, primary_model_id
         )
+
+        logger.debug(f"GPU Allocation: {gpu_allocation}")
+
         if not gpu_allocation:
             summary.error_message = (
                 "Unable to determine a valid GPU allocation for the selected models based on their current configurations. "

From a0aa1efd978f068c88a6eebfd5c763c379f3b4ac Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 27 Feb 2025 22:48:47 -0500
Subject: [PATCH 062/124] Collect shape from configuration entry.

---
 ads/aqua/modeldeployment/utils.py                 | 11 +++++++----
 tests/unitary/with_extras/aqua/test_deployment.py | 10 +++-------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 9ddeb76a1..2fe8e0b09 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -135,11 +135,14 @@ def _fetch_deployment_configs_concurrently(
     def _extract_model_shape_gpu(
         self, deployment_configs: Dict[str, AquaDeploymentConfig]
     ):
-        """Extracts shape and GPU count details from deployment configurations."""
+        """Extracts shape and GPU count details from deployment configurations.
+        Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
+        """
         model_shape_gpu = {}
         deployment = {}
 
         for model_id, config in deployment_configs.items():
+            multi_deployment_shape = list(config.configuration.keys())
             model_shape_gpu[model_id] = {
                 shape: [
                     item.gpu_count
@@ -147,13 +150,13 @@ def _extract_model_shape_gpu(
                         shape, ConfigurationItem()
                     ).multi_model_deployment
                 ]
-                for shape in config.shape
+                for shape in multi_deployment_shape
             }
             deployment[model_id] = {
-                "shape": config.shape,
+                "shape": multi_deployment_shape,
                 "configuration": {
                     shape: config.configuration.get(shape, ConfigurationItem())
-                    for shape in config.shape
+                    for shape in multi_deployment_shape
                 },
             }
 
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 138e15977..272637259 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -499,10 +499,10 @@ class TestDataset:
         "deployment_config": {
             "model_a": {
                 "shape": [
-                    "VM.GPU.A10.2",
-                    "VM.GPU.A10.4",
                     "BM.GPU.A100-v2.8",
                     "BM.GPU.H100.8",
+                    "VM.GPU.A10.2",
+                    "VM.GPU.A10.4",
                 ],
                 "configuration": {
                     "VM.GPU.A10.2": {
@@ -815,13 +815,9 @@ def test_get_multimodel_deployment_config(
 
     @parameterized.expand(
         [
-            [
-                "shape",
-                "Unable to determine a valid GPU allocation for the selected models based on their current configurations. Please try selecting a different set of models.",
-            ],
             [
                 "configuration",
-                "Unable to determine a valid GPU allocation for the selected models based on their current configurations. Please select a different set of models.",
+                "Unable to determine a valid GPU allocation for the selected models based on their current configurations. Please try selecting a different set of models.",
             ],
         ]
     )

From f7e7c2ed7816ec0edb1fcae28f5575ed1f4434ad Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Thu, 27 Feb 2025 21:22:59 -0800
Subject: [PATCH 063/124] Fixes GPU allocation validation

---
 ads/aqua/modeldeployment/entities.py | 9 +++++++--
 ads/aqua/modeldeployment/utils.py    | 4 ++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index d3bd8cd3b..39a0d5580 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -522,10 +522,15 @@ def validate_multimodel_deployment_feasibility(
 
         for model in self.models:
             sum_model_gpus += model.gpu_count
-
             aqua_deployment_config = model_deployment_config[model.model_id]
 
-            if selected_shape not in aqua_deployment_config.shape:
+            # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
+            # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
+            # Our current configuration does not support this flexibility.
+            # multi_deployment_shape = aqua_deployment_config.shape
+            multi_deployment_shape = list(aqua_deployment_config.configuration.keys())
+
+            if selected_shape not in multi_deployment_shape:
                 logger.error(
                     f"Model with OCID {model.model_id} in the model group is not compatible with the selected instance shape: {selected_shape}"
                 )
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 2fe8e0b09..b6103d7ac 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -142,6 +142,10 @@ def _extract_model_shape_gpu(
         deployment = {}
 
         for model_id, config in deployment_configs.items():
+            # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
+            # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
+            # Our current configuration does not support this flexibility.
+            # multi_deployment_shape = config.shape
             multi_deployment_shape = list(config.configuration.keys())
             model_shape_gpu[model_id] = {
                 shape: [

From 8efb221dfdbcb1440e32dae3260bbba0fc9e01b0 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 28 Feb 2025 17:32:59 -0800
Subject: [PATCH 064/124] Extract Available GPU Count from
 list_model_deployment_shapes API for Multi-Model Deployment

---
 ads/aqua/common/constants.py                  |  81 +++++
 ads/aqua/common/entities.py                   |  81 ++++-
 ads/aqua/common/utils.py                      |  16 +-
 ads/aqua/extension/base_handler.py            |   6 +-
 ads/aqua/extension/deployment_handler.py      |  26 +-
 ads/aqua/modeldeployment/deployment.py        |  67 +++-
 ads/aqua/modeldeployment/entities.py          |  24 +-
 ads/aqua/modeldeployment/utils.py             |  50 ++-
 .../with_extras/aqua/test_common_entities.py  |  59 ++++
 .../deployment/aqua_deployment_shapes.json    | 288 ++++++++++++++++++
 .../with_extras/aqua/test_deployment.py       |  46 ++-
 11 files changed, 712 insertions(+), 32 deletions(-)
 create mode 100644 ads/aqua/common/constants.py
 create mode 100644 tests/unitary/with_extras/aqua/test_common_entities.py
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/aqua_deployment_shapes.json

diff --git a/ads/aqua/common/constants.py b/ads/aqua/common/constants.py
new file mode 100644
index 000000000..a799bacdc
--- /dev/null
+++ b/ads/aqua/common/constants.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+GPU_SPECS = {
+    "VM.GPU2.1": {
+        "gpu_type": "P100",
+        "gpu_count": 1,
+        "gpu_memory_in_gbs": 16,
+    },
+    "VM.GPU3.1": {
+        "gpu_type": "V100",
+        "gpu_count": 1,
+        "gpu_memory_in_gbs": 16,
+    },
+    "VM.GPU3.2": {
+        "gpu_type": "V100",
+        "gpu_count": 2,
+        "gpu_memory_in_gbs": 32,
+    },
+    "VM.GPU3.4": {
+        "gpu_type": "V100",
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 64,
+    },
+    "BM.GPU2.2": {
+        "gpu_type": "P100",
+        "gpu_count": 2,
+        "gpu_memory_in_gbs": 32,
+    },
+    "BM.GPU3.8": {
+        "gpu_type": "V100",
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 128,
+    },
+    "BM.GPU4.8": {
+        "gpu_type": "A100",
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 320,
+    },
+    "BM.GPU.A10.4": {
+        "gpu_type": "A10",
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 96,
+    },
+    "VM.GPU.A10.4": {
+        "gpu_type": "A10",
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 96,
+    },
+    "BM.GPU.H100.8": {
+        "gpu_type": "H100",
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 640,
+    },
+    "VM.GPU.A10.1": {
+        "gpu_type": "A10",
+        "gpu_count": 1,
+        "gpu_memory_in_gbs": 24,
+    },
+    "VM.GPU.A10.2": {
+        "gpu_type": "A10",
+        "gpu_count": 2,
+        "gpu_memory_in_gbs": 48,
+    },
+    "BM.GPU.L40S-NC.4": {
+        "gpu_type": "L40S",
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 192,
+    },
+    "BM.GPU.H200.8": {
+        "gpu_type": "H200",
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 1128,
+    },
+    "BM.GPU.A100-v2.8": {
+        "gpu_type": "A100",
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 320,
+    },
+}
diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 294b82f59..864ee19b6 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -2,10 +2,13 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+import re
 from typing import Optional
 
-from pydantic import Field
+from pydantic import Field, model_validator
 
+from ads.aqua.app import logger
+from ads.aqua.common.constants import GPU_SPECS
 from ads.aqua.config.utils.serializer import Serializable
 
 
@@ -23,14 +26,76 @@ class ContainerSpec:
     EVALUATION_CONFIGURATION = "evaluationConfiguration"
 
 
-class ShapeInfo(Serializable):
-    instance_shape: Optional[str] = None
-    instance_count: Optional[int] = None
-    ocpus: Optional[float] = None
-    memory_in_gbs: Optional[float] = None
+class GPUSpecs(Serializable):
+    """
+    Represents the GPU specifications for a compute instance.
+    """
 
-    class Config:
-        extra = "ignore"
+    gpu_memory_in_gbs: Optional[int] = Field(
+        default=None, description="The amount of GPU memory available (in GB)."
+    )
+    gpu_count: Optional[int] = Field(
+        default=None, description="The number of GPUs available."
+    )
+    gpu_type: Optional[str] = Field(
+        default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
+    )
+
+
+class ComputeShapeSummary(Serializable):
+    """
+    Represents the specifications of a compute instance's shape.
+    """
+
+    core_count: Optional[int] = Field(
+        default=None, description="The number of CPU cores available."
+    )
+    memory_in_gbs: Optional[int] = Field(
+        default=None, description="The amount of memory (in GB) available."
+    )
+    name: Optional[str] = Field(
+        default=None, description="The name identifier of the compute shape."
+    )
+    shape_series: Optional[str] = Field(
+        default=None, description="The series or category of the compute shape."
+    )
+    gpu_specs: Optional[GPUSpecs] = Field(
+        default=None,
+        description="The GPU specifications associated with the compute shape.",
+    )
+
+    @model_validator(mode="after")
+    def set_gpu_specs(self, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
+        """
+        Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
+
+        - If the shape_series contains "GPU", the validator first checks if the shape name exists
+          in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data.
+        - If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name
+          using a regex pattern (looking for a number following a dot at the end of the name).
+
+        The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm
+
+        Returns:
+            ComputeShapeSummary: The updated instance with gpu_specs populated if applicable.
+        """
+        try:
+            if model.shape_series and "GPU" in model.shape_series.upper():
+                if model.name and model.name in GPU_SPECS:
+                    gpu_info = GPU_SPECS[model.name]
+                    model.gpu_specs = GPUSpecs(**gpu_info)
+                elif model.name:
+                    # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
+                    match = re.search(r"\.(\d+)$", model.name)
+                    if match:
+                        gpu_count = int(match.group(1))
+                        model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
+        except Exception as err:
+            logger.info(
+                f"Error occurred in attempt to extract GPU specification for the f{model.name}. "
+                f"Details: {err}"
+            )
+        return model
 
 
 class AquaMultiModelRef(Serializable):
diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index 6eed32ba9..3a19ca6ce 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -33,7 +33,7 @@
 )
 from oci.data_science.models import JobRun, Model
 from oci.object_storage.models import ObjectSummary
-from pydantic import ValidationError
+from pydantic import BaseModel, ValidationError
 
 from ads.aqua.common.enums import (
     InferenceContainerParamType,
@@ -1238,3 +1238,17 @@ def build_pydantic_error_message(ex: ValidationError):
         for e in ex.errors()
         if "loc" in e and e["loc"]
     } or "; ".join(e["msg"] for e in ex.errors())
+
+
+def is_pydantic_model(obj: object) -> bool:
+    """
+    Returns True if obj is a Pydantic model class or an instance of a Pydantic model.
+
+    Args:
+        obj: The object or class to check.
+
+    Returns:
+        bool: True if obj is a subclass or instance of BaseModel, False otherwise.
+    """
+    cls = obj if isinstance(obj, type) else type(obj)
+    return issubclass(cls, BaseModel)
diff --git a/ads/aqua/extension/base_handler.py b/ads/aqua/extension/base_handler.py
index 80af4fc44..f56e4bf36 100644
--- a/ads/aqua/extension/base_handler.py
+++ b/ads/aqua/extension/base_handler.py
@@ -15,6 +15,7 @@
 from tornado.web import Application, HTTPError
 
 from ads.aqua import logger
+from ads.aqua.common.utils import is_pydantic_model
 from ads.config import AQUA_TELEMETRY_BUCKET, AQUA_TELEMETRY_BUCKET_NS
 from ads.telemetry.client import TelemetryClient
 
@@ -40,7 +41,7 @@ def __init__(
     def prepare(self, *args, **kwargs):
         """The base class prepare is not required for Aqua"""
         pass
-        
+
     @staticmethod
     def serialize(obj: Any):
         """Serialize the object.
@@ -52,6 +53,9 @@ def serialize(obj: Any):
         if is_dataclass(obj):
             return asdict(obj)
 
+        if is_pydantic_model(obj):
+            return obj.model_dump()
+
         return str(obj)
 
     def finish(self, payload=None):  # pylint: disable=W0221
diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index 75f00adfd..fb48fd3ea 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -31,6 +31,8 @@ class AquaDeploymentHandler(AquaAPIhandler):
         Lists all the AQUA deployments.
     get_deployment_config(self, model_id)
         Gets the deployment config for Aqua model.
+    list_shapes(self)
+        Lists the valid model deployment shapes.
 
     Raises
     ------
@@ -49,6 +51,8 @@ def get(self, id: Union[str, List[str]] = None):
                     f"The request to {self.request.path} must include either a single model ID or a list of model IDs.",
                 )
             return self.get_deployment_config(id)
+        elif paths.startswith("aqua/deployments/shapes"):
+            return self.list_shapes()
         elif paths.startswith("aqua/deployments"):
             if not id:
                 return self.list()
@@ -135,11 +139,15 @@ def get_deployment_config(self, model_id: Union[str, List[str]]):
         """
         app = AquaDeploymentApp()
 
+        compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID)
+
         if isinstance(model_id, list):
             # Handle multiple model deployment
             primary_model_id = self.get_argument("primary_model_id", default=None)
             deployment_config = app.get_multimodel_deployment_config(
-                model_ids=model_id, primary_model_id=primary_model_id
+                model_ids=model_id,
+                primary_model_id=primary_model_id,
+                compartment_id=compartment_id,
             )
         else:
             # Handle single model deployment
@@ -147,6 +155,21 @@ def get_deployment_config(self, model_id: Union[str, List[str]]):
 
         return self.finish(deployment_config)
 
+    def list_shapes(self):
+        """
+        Lists the valid model deployment shapes.
+
+        Returns
+        -------
+        List[ComputeShapeSummary]:
+            The list of the model deployment shapes.
+        """
+        compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID)
+
+        return self.finish(
+            AquaDeploymentApp().list_shapes(compartment_id=compartment_id)
+        )
+
 
 class AquaDeploymentInferenceHandler(AquaAPIhandler):
     @staticmethod
@@ -263,6 +286,7 @@ def post(self, *args, **kwargs):  # noqa: ARG002
 __handlers__ = [
     ("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
     ("deployments/config/?([^/]*)", AquaDeploymentHandler),
+    ("deployments/shapes/?([^/]*)", AquaDeploymentHandler),
     ("deployments/?([^/]*)", AquaDeploymentHandler),
     ("deployments/?([^/]*)/activate", AquaDeploymentHandler),
     ("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 53195d1e3..7d32aee90 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -4,12 +4,19 @@
 
 import json
 import shlex
+from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Union
 
+from cachetools import TTLCache, cached
+from oci.data_science.models import ModelDeploymentShapeSummary
 from pydantic import ValidationError
 
 from ads.aqua.app import AquaApp, logger
-from ads.aqua.common.entities import AquaMultiModelRef, ContainerSpec
+from ads.aqua.common.entities import (
+    AquaMultiModelRef,
+    ComputeShapeSummary,
+    ContainerSpec,
+)
 from ads.aqua.common.enums import InferenceContainerTypeFamily, Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
@@ -90,6 +97,11 @@ class AquaDeploymentApp(AquaApp):
         Lists all Aqua deployments within a specified compartment and/or project.
     get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
         Gets the deployment config of given Aqua model.
+    get_multimodel_deployment_config(self, model_ids: List[str],...) -> ModelDeploymentConfigSummary:
+        Retrieves the deployment configuration for multiple Aqua models and calculates
+        the GPU allocations for all compatible shapes.
+    list_shapes(self, **kwargs) -> List[Dict]:
+        Lists the valid model deployment shapes.
 
     Note:
         Use `ads aqua deployment <method_name> --help` to get more details on the parameters available.
@@ -931,7 +943,10 @@ def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig:
         name="aqua",
     )
     def get_multimodel_deployment_config(
-        self, model_ids: List[str], primary_model_id: Optional[str] = None
+        self,
+        model_ids: List[str],
+        primary_model_id: Optional[str] = None,
+        **kwargs: Dict,
     ) -> ModelDeploymentConfigSummary:
         """
         Retrieves the deployment configuration for multiple Aqua models and calculates
@@ -956,6 +971,9 @@ def get_multimodel_deployment_config(
         primary_model_id : Optional[str]
             The OCID of the primary Aqua model. If provided, GPU allocation will prioritize
             this model. Otherwise, GPUs will be evenly allocated.
+        **kwargs: Dict
+            - compartment_id: str
+                The compartment OCID to retrieve the model deployment shapes.
 
         Returns
         -------
@@ -963,7 +981,18 @@ def get_multimodel_deployment_config(
             A summary of the model deployment configurations and GPU allocations.
         """
 
-        return MultiModelDeploymentConfigLoader(self).load(model_ids, primary_model_id)
+        compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
+
+        # Get the all model deployment available shapes in a given compartment
+        available_shapes = self.list_shapes(compartment_id=compartment_id)
+
+        return MultiModelDeploymentConfigLoader(
+            deployment_app=self,
+        ).load(
+            shapes=available_shapes,
+            model_ids=model_ids,
+            primary_model_id=primary_model_id,
+        )
 
     def get_deployment_default_params(
         self,
@@ -1123,3 +1152,35 @@ def _find_restricted_params(
                     restricted_params.append(key.lstrip("-"))
 
         return restricted_params
+
+    @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
+    @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
+    def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
+        """Lists the valid model deployment shapes.
+
+        Parameters
+        ----------
+        kwargs
+            Keyword arguments, such as compartment_id
+            for `list_call_get_all_results <https://docs.oracle.com/en-us/iaas/tools/python/2.118.1/api/pagination.html#oci.pagination.list_call_get_all_results>`_
+
+        Returns
+        -------
+        List[ComputeShapeSummary]:
+            The list of the model deployment shapes.
+        """
+        compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
+        oci_shapes: list[ModelDeploymentShapeSummary] = self.list_resource(
+            self.ds_client.list_model_deployment_shapes,
+            compartment_id=compartment_id,
+            **kwargs,
+        )
+        return [
+            ComputeShapeSummary(
+                core_count=oci_shape.core_count,
+                memory_in_gbs=oci_shape.memory_in_gbs,
+                shape_series=oci_shape.shape_series,
+                name=oci_shape.name,
+            )
+            for oci_shape in oci_shapes
+        ]
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 39a0d5580..aaf2aed08 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -8,7 +8,7 @@
 from pydantic import BaseModel, Field, model_validator
 
 from ads.aqua import logger
-from ads.aqua.common.entities import AquaMultiModelRef, ShapeInfo
+from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
 from ads.aqua.constants import UNKNOWN, UNKNOWN_DICT
@@ -17,6 +17,28 @@
 from ads.common.utils import get_console_link
 
 
+class ShapeInfo(Serializable):
+    """
+    Represents the configuration details for a compute instance shape.
+    """
+
+    instance_shape: Optional[str] = Field(
+        default=None,
+        description="The identifier of the compute instance shape (e.g., VM.Standard2.1)",
+    )
+    instance_count: Optional[int] = Field(
+        default=None, description="The number of instances for the given shape."
+    )
+    ocpus: Optional[float] = Field(
+        default=None,
+        description="The number of Oracle CPUs allocated for the instance.",
+    )
+    memory_in_gbs: Optional[float] = Field(
+        default=None,
+        description="The total memory allocated for the instance, in gigabytes.",
+    )
+
+
 class ModelParams(Serializable):
     max_tokens: Optional[int] = None
     temperature: Optional[float] = None
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index b6103d7ac..2626bf70c 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -10,6 +10,7 @@
 from typing import Dict, List, Optional
 
 from ads.aqua.app import AquaApp
+from ads.aqua.common.entities import ComputeShapeSummary
 from ads.aqua.modeldeployment.entities import (
     AquaDeploymentConfig,
     ConfigurationItem,
@@ -42,13 +43,18 @@ def __init__(self, deployment_app: AquaApp):
         self.deployment_app = deployment_app
 
     def load(
-        self, model_ids: List[str], primary_model_id: Optional[str] = None
+        self,
+        shapes: List[ComputeShapeSummary],
+        model_ids: List[str],
+        primary_model_id: Optional[str] = None,
     ) -> ModelDeploymentConfigSummary:
         """
         Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
 
         Parameters
         ----------
+        shapes : List[ComputeShapeSummary]
+            Model deployment shapes.
         model_ids : List[str]
             A list of OCIDs for the Aqua models.
         primary_model_id : Optional[str], optional
@@ -97,7 +103,10 @@ def load(
 
         # Compute GPU allocations based on the common shapes and optionally prioritize a primary model.
         gpu_allocation = self._compute_gpu_allocation(
-            common_shapes, model_shape_gpu, primary_model_id
+            shapes=shapes,
+            common_shapes=common_shapes,
+            model_shape_gpu=model_shape_gpu,
+            primary_model_id=primary_model_id,
         )
 
         logger.debug(f"GPU Allocation: {gpu_allocation}")
@@ -178,6 +187,7 @@ def _get_common_shapes(
 
     def _compute_gpu_allocation(
         self,
+        shapes: List[ComputeShapeSummary],
         common_shapes: List[str],
         model_shape_gpu: Dict[str, Dict[str, List[int]]],
         primary_model_id: Optional[str],
@@ -186,6 +196,16 @@ def _compute_gpu_allocation(
         gpu_allocation = {}
 
         for common_shape in common_shapes:
+            total_gpus_available = 0
+
+            # search the shape in the available shapes list
+            shape_summary = next(
+                (shape for shape in shapes if shape.name == common_shape),
+                None,
+            )
+            if shape_summary and shape_summary.gpu_specs:
+                total_gpus_available = shape_summary.gpu_specs.gpu_count
+
             model_gpu = {
                 model: shape_gpu[common_shape]
                 for model, shape_gpu in model_shape_gpu.items()
@@ -195,19 +215,26 @@ def _compute_gpu_allocation(
             if len(model_gpu) != len(model_shape_gpu):
                 continue
 
-            is_compatible, max_gpu_count, combination = self._verify_compatibility(
-                model_gpu, primary_model_id
+            is_compatible, total_gpus_available, combination = (
+                self._verify_compatibility(
+                    total_gpus_available=total_gpus_available,
+                    model_gpu_dict=model_gpu,
+                    primary_model_id=primary_model_id,
+                )
             )
 
             if is_compatible:
                 gpu_allocation[common_shape] = GPUShapeAllocation(
-                    models=combination, total_gpus_available=max_gpu_count
+                    models=combination, total_gpus_available=total_gpus_available
                 )
 
         return gpu_allocation
 
     def _verify_compatibility(
-        self, model_gpu_dict: Dict, primary_model_id: str = None
+        self,
+        total_gpus_available: int,
+        model_gpu_dict: Dict,
+        primary_model_id: str = None,
     ) -> tuple:
         """Calculates the gpu allocations for all compatible shapes.
         If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
@@ -234,7 +261,7 @@ def _verify_compatibility(
         tuple:
             A tuple of gpu count allocation result.
         """
-        maximum_gpu_count = max([sorted(gpus)[-1] for gpus in model_gpu_dict.values()])
+
         model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
         if primary_model_id:
             primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
@@ -243,12 +270,13 @@ def _verify_compatibility(
                 for combination in combinations:
                     if (
                         len(combination) == len(model_gpu_dict_copy)
-                        and sum(combination.values()) == maximum_gpu_count - gpu_count
+                        and sum(combination.values())
+                        == total_gpus_available - gpu_count
                     ):
                         combination[primary_model_id] = gpu_count
                         return (
                             True,
-                            maximum_gpu_count,
+                            total_gpus_available,
                             [
                                 GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
                                 for ocid, gpu_count in combination.items()
@@ -262,7 +290,7 @@ def _verify_compatibility(
             for combination in combinations:
                 if (
                     len(combination) == len(model_gpu_dict_copy)
-                    and sum(combination.values()) == maximum_gpu_count
+                    and sum(combination.values()) == total_gpus_available
                 ):
                     difference = max(combination.values()) - min(combination.values())
                     if difference < minimal_difference:
@@ -276,7 +304,7 @@ def _verify_compatibility(
             if optimal_combination:
                 return (
                     True,
-                    maximum_gpu_count,
+                    total_gpus_available,
                     [
                         GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
                         for ocid, gpu_count in optimal_combination.items()
diff --git a/tests/unitary/with_extras/aqua/test_common_entities.py b/tests/unitary/with_extras/aqua/test_common_entities.py
new file mode 100644
index 000000000..7d3a67aef
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_common_entities.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import pytest
+
+from ads.aqua.common.entities import ComputeShapeSummary
+
+
+class TestComputeShapeSummary:
+    @pytest.mark.parametrize(
+        "input_data, expected_gpu_specs",
+        [
+            # Case 1: Shape is present in GPU_SPECS.
+            (
+                {
+                    "core_count": 32,
+                    "memory_in_gbs": 512,
+                    "name": "VM.GPU2.1",
+                    "shape_series": "GPU",
+                },
+                {"gpu_type": "P100", "gpu_count": 1, "gpu_memory_in_gbs": 16},
+            ),
+            # Case 2: Not in GPU_SPECS; fallback extraction should yield gpu_count.
+            (
+                {
+                    "core_count": 16,
+                    "memory_in_gbs": 256,
+                    "name": "VM.GPU.UNKNOWN.4",
+                    "shape_series": "GPU",
+                },
+                {"gpu_type": None, "gpu_count": 4, "gpu_memory_in_gbs": None},
+            ),
+            # Case 3: Non-GPU shape should not populate GPU specs.
+            (
+                {
+                    "core_count": 8,
+                    "memory_in_gbs": 64,
+                    "name": "VM.Standard2.1",
+                    "shape_series": "STANDARD",
+                },
+                None,
+            ),
+        ],
+    )
+    def test_set_gpu_specs(self, input_data, expected_gpu_specs):
+        shape = ComputeShapeSummary(**input_data)
+        if expected_gpu_specs is None:
+            assert shape.gpu_specs is None
+        else:
+            assert shape.gpu_specs is not None
+            # Verify GPU type, count, and memory.
+            assert shape.gpu_specs.gpu_type == expected_gpu_specs.get("gpu_type")
+            assert shape.gpu_specs.gpu_count == expected_gpu_specs.get("gpu_count")
+            assert shape.gpu_specs.gpu_memory_in_gbs == expected_gpu_specs.get(
+                "gpu_memory_in_gbs"
+            )
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_deployment_shapes.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_deployment_shapes.json
new file mode 100644
index 000000000..66f3d3aaa
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_deployment_shapes.json
@@ -0,0 +1,288 @@
+{
+  "shapes": [
+    {
+      "core_count": 1,
+      "gpu_specs": null,
+      "memory_in_gbs": 15,
+      "name": "VM.Standard2.1",
+      "shape_series": "INTEL_SKYLAKE"
+    },
+    {
+      "core_count": 2,
+      "gpu_specs": null,
+      "memory_in_gbs": 30,
+      "name": "VM.Standard2.2",
+      "shape_series": "INTEL_SKYLAKE"
+    },
+    {
+      "core_count": 4,
+      "gpu_specs": null,
+      "memory_in_gbs": 30,
+      "name": "VM.Standard2.4",
+      "shape_series": "INTEL_SKYLAKE"
+    },
+    {
+      "core_count": 8,
+      "gpu_specs": null,
+      "memory_in_gbs": 120,
+      "name": "VM.Standard2.8",
+      "shape_series": "INTEL_SKYLAKE"
+    },
+    {
+      "core_count": 16,
+      "gpu_specs": null,
+      "memory_in_gbs": 240,
+      "name": "VM.Standard2.16",
+      "shape_series": "INTEL_SKYLAKE"
+    },
+    {
+      "core_count": 24,
+      "gpu_specs": null,
+      "memory_in_gbs": 320,
+      "name": "VM.Standard2.24",
+      "shape_series": "INTEL_SKYLAKE"
+    },
+    {
+      "core_count": 64,
+      "gpu_specs": null,
+      "memory_in_gbs": 1024,
+      "name": "VM.Standard.E3.Flex",
+      "shape_series": "AMD_ROME"
+    },
+    {
+      "core_count": 64,
+      "gpu_specs": null,
+      "memory_in_gbs": 1024,
+      "name": "VM.Standard.E4.Flex",
+      "shape_series": "AMD_ROME"
+    },
+    {
+      "core_count": 94,
+      "gpu_specs": null,
+      "memory_in_gbs": 1049,
+      "name": "VM.Standard.E5.Flex",
+      "shape_series": "AMD_ROME"
+    },
+    {
+      "core_count": 32,
+      "gpu_specs": null,
+      "memory_in_gbs": 512,
+      "name": "VM.Standard3.Flex",
+      "shape_series": "INTEL_SKYLAKE"
+    },
+    {
+      "core_count": 18,
+      "gpu_specs": null,
+      "memory_in_gbs": 256,
+      "name": "VM.Optimized3.Flex",
+      "shape_series": "INTEL_SKYLAKE"
+    },
+    {
+      "core_count": 80,
+      "gpu_specs": null,
+      "memory_in_gbs": 512,
+      "name": "VM.Standard.A1.Flex",
+      "shape_series": "ARM"
+    },
+    {
+      "core_count": 78,
+      "gpu_specs": null,
+      "memory_in_gbs": 946,
+      "name": "VM.Standard.A2.Flex",
+      "shape_series": "ARM"
+    },
+    {
+      "core_count": 12,
+      "gpu_specs": {
+        "gpu_count": 1,
+        "gpu_memory_in_gbs": 16,
+        "gpu_type": "P100"
+      },
+      "memory_in_gbs": 72,
+      "name": "VM.GPU2.1",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 6,
+      "gpu_specs": {
+        "gpu_count": 1,
+        "gpu_memory_in_gbs": 16,
+        "gpu_type": "V100"
+      },
+      "memory_in_gbs": 90,
+      "name": "VM.GPU3.1",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 12,
+      "gpu_specs": {
+        "gpu_count": 2,
+        "gpu_memory_in_gbs": 32,
+        "gpu_type": "V100"
+      },
+      "memory_in_gbs": 180,
+      "name": "VM.GPU3.2",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 24,
+      "gpu_specs": {
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 64,
+        "gpu_type": "V100"
+      },
+      "memory_in_gbs": 360,
+      "name": "VM.GPU3.4",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 28,
+      "gpu_specs": {
+        "gpu_count": 2,
+        "gpu_memory_in_gbs": 32,
+        "gpu_type": "P100"
+      },
+      "memory_in_gbs": 192,
+      "name": "BM.GPU2.2",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 52,
+      "gpu_specs": {
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 128,
+        "gpu_type": "V100"
+      },
+      "memory_in_gbs": 768,
+      "name": "BM.GPU3.8",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 64,
+      "gpu_specs": {
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 320,
+        "gpu_type": "A100"
+      },
+      "memory_in_gbs": 2048,
+      "name": "BM.GPU4.8",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 64,
+      "gpu_specs": {
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 320,
+        "gpu_type": "A100"
+      },
+      "memory_in_gbs": 2048,
+      "name": "BM.GPU.A100-v2.8",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 112,
+      "gpu_specs": {
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 1128,
+        "gpu_type": "H200"
+      },
+      "memory_in_gbs": 2048,
+      "name": "BM.GPU.H100.8",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 32,
+      "gpu_specs": {
+        "gpu_count": 2,
+        "gpu_memory_in_gbs": null,
+        "gpu_type": null
+      },
+      "memory_in_gbs": 1024,
+      "name": "BM.GPU.T1.2",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 64,
+      "gpu_specs": {
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 96,
+        "gpu_type": "A10"
+      },
+      "memory_in_gbs": 1024,
+      "name": "BM.GPU.A10.4",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 64,
+      "gpu_specs": {
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 96,
+        "gpu_type": "A10"
+      },
+      "memory_in_gbs": 1024,
+      "name": "VM.GPU.A10.4",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 112,
+      "gpu_specs": {
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 192,
+        "gpu_type": "L40S"
+      },
+      "memory_in_gbs": 1024,
+      "name": "BM.GPU.L40S-NC.4",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 15,
+      "gpu_specs": {
+        "gpu_count": 1,
+        "gpu_memory_in_gbs": 24,
+        "gpu_type": "A10"
+      },
+      "memory_in_gbs": 240,
+      "name": "VM.GPU.A10.1",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 30,
+      "gpu_specs": {
+        "gpu_count": 2,
+        "gpu_memory_in_gbs": 48,
+        "gpu_type": "A10"
+      },
+      "memory_in_gbs": 480,
+      "name": "VM.GPU.A10.2",
+      "shape_series": "NVIDIA_GPU"
+    },
+    {
+      "core_count": 64,
+      "gpu_specs": null,
+      "memory_in_gbs": 1024,
+      "name": "VM.Standard.AMD.Generic",
+      "shape_series": "GENERIC"
+    },
+    {
+      "core_count": 32,
+      "gpu_specs": null,
+      "memory_in_gbs": 512,
+      "name": "VM.Standard.Intel.Generic",
+      "shape_series": "GENERIC"
+    },
+    {
+      "core_count": 80,
+      "gpu_specs": null,
+      "memory_in_gbs": 512,
+      "name": "VM.Standard.Ampere.Generic",
+      "shape_series": "GENERIC"
+    },
+    {
+      "core_count": 32,
+      "gpu_specs": null,
+      "memory_in_gbs": 512,
+      "name": "VM.Standard.x86.Generic",
+      "shape_series": "GENERIC"
+    }
+  ]
+}
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 272637259..a660d691e 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -15,7 +15,7 @@
 import pytest
 from parameterized import parameterized
 
-from ads.aqua.common.entities import AquaMultiModelRef
+from ads.aqua.common.entities import AquaMultiModelRef, ComputeShapeSummary
 import ads.aqua.modeldeployment.deployment
 import ads.config
 from ads.aqua.common.entities import AquaMultiModelRef
@@ -793,8 +793,9 @@ def test_get_deployment_config(self):
     @patch(
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )
+    @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes")
     def test_get_multimodel_deployment_config(
-        self, mock_fetch_deployment_configs_concurrently
+        self, mock_list_shapes, mock_fetch_deployment_configs_concurrently
     ):
         config_json = os.path.join(
             self.curr_dir,
@@ -803,6 +804,20 @@ def test_get_multimodel_deployment_config(
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
+        shapes = []
+
+        with open(
+            os.path.join(
+                self.curr_dir,
+                "test_data/deployment/aqua_deployment_shapes.json",
+            ),
+            "r",
+        ) as _file:
+            shapes = [
+                ComputeShapeSummary(**item) for item in json.load(_file)["shapes"]
+            ]
+        mock_list_shapes.return_value = shapes
+
         mock_fetch_deployment_configs_concurrently.return_value = {
             "model_a": AquaDeploymentConfig(**config)
         }
@@ -824,8 +839,13 @@ def test_get_multimodel_deployment_config(
     @patch(
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )
+    @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes")
     def test_get_multimodel_compatible_shapes_invalid_config(
-        self, missing_key, error, mock_fetch_deployment_configs_concurrently
+        self,
+        missing_key,
+        error,
+        mock_list_shapes,
+        mock_fetch_deployment_configs_concurrently,
     ):
         config_json = os.path.join(
             self.curr_dir,
@@ -836,6 +856,20 @@ def test_get_multimodel_compatible_shapes_invalid_config(
 
         config.pop(missing_key)
 
+        shapes = []
+
+        with open(
+            os.path.join(
+                self.curr_dir,
+                "test_data/deployment/aqua_deployment_shapes.json",
+            ),
+            "r",
+        ) as _file:
+            shapes = [
+                ComputeShapeSummary(**item) for item in json.load(_file)["shapes"]
+            ]
+        mock_list_shapes.return_value = shapes
+
         mock_fetch_deployment_configs_concurrently.return_value = {
             "model_a": AquaDeploymentConfig(**config)
         }
@@ -846,7 +880,7 @@ def test_get_multimodel_compatible_shapes_invalid_config(
 
     def test_verify_compatibility(self):
         result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(
-            TestDataset.model_gpu_dict
+            8, TestDataset.model_gpu_dict
         )
 
         assert result[0] == True
@@ -854,7 +888,7 @@ def test_verify_compatibility(self):
         assert len(result[2]) == 3
 
         result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(
-            model_gpu_dict=TestDataset.model_gpu_dict, primary_model_id="model_b"
+            8, model_gpu_dict=TestDataset.model_gpu_dict, primary_model_id="model_b"
         )
 
         assert result[0] == True
@@ -867,7 +901,7 @@ def test_verify_compatibility(self):
                 assert item.gpu_count == 4
 
         result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(
-            TestDataset.incompatible_model_gpu_dict
+            0, TestDataset.incompatible_model_gpu_dict
         )
 
         assert result[0] == False

From f8e48708cd24eaae9b514a561999717921e32d95 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 28 Feb 2025 17:47:34 -0800
Subject: [PATCH 065/124] Fixes validator

---
 ads/aqua/common/entities.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 864ee19b6..7a3deb851 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -65,7 +65,8 @@ class ComputeShapeSummary(Serializable):
     )
 
     @model_validator(mode="after")
-    def set_gpu_specs(self, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
+    @classmethod
+    def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
         """
         Validates and populates GPU specifications if the shape_series indicates a GPU-based shape.
 

From 1f186a1b45dca351f164ce4f5b6c6dbcea7405bd Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 28 Feb 2025 23:06:36 -0800
Subject: [PATCH 066/124] Fixes tests

---
 ads/aqua/modeldeployment/utils.py             | 28 +++++++++++--------
 .../with_extras/aqua/test_deployment.py       | 11 +++-----
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 2626bf70c..fffd4ddab 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -54,7 +54,7 @@ def load(
         Parameters
         ----------
         shapes : List[ComputeShapeSummary]
-            Model deployment shapes.
+            Model deployment available shapes.
         model_ids : List[str]
             A list of OCIDs for the Aqua models.
         primary_model_id : Optional[str], optional
@@ -91,9 +91,19 @@ def load(
         common_shapes = self._get_common_shapes(model_shape_gpu)
         logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
 
+        # Filter out not available shapes
+        available_shapes = [item.name.upper() for item in shapes]
+        logger.debug(f"Service Available Shapes: {available_shapes}")
+        common_shapes = [
+            shape_name
+            for shape_name in common_shapes
+            if shape_name.upper() in available_shapes
+        ]
+        logger.debug(f"Available Common Shapes: {common_shapes}")
+
         if not common_shapes:
             summary.error_message = (
-                "The selected models do not share any common deployment shapes. "
+                "The selected models do not share any available common deployment shapes. "
                 "Please ensure that all chosen models are compatible for multi-model deployment."
             )
             logger.debug(
@@ -215,12 +225,10 @@ def _compute_gpu_allocation(
             if len(model_gpu) != len(model_shape_gpu):
                 continue
 
-            is_compatible, total_gpus_available, combination = (
-                self._verify_compatibility(
-                    total_gpus_available=total_gpus_available,
-                    model_gpu_dict=model_gpu,
-                    primary_model_id=primary_model_id,
-                )
+            is_compatible, combination = self._verify_compatibility(
+                total_gpus_available=total_gpus_available,
+                model_gpu_dict=model_gpu,
+                primary_model_id=primary_model_id,
             )
 
             if is_compatible:
@@ -276,7 +284,6 @@ def _verify_compatibility(
                         combination[primary_model_id] = gpu_count
                         return (
                             True,
-                            total_gpus_available,
                             [
                                 GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
                                 for ocid, gpu_count in combination.items()
@@ -304,14 +311,13 @@ def _verify_compatibility(
             if optimal_combination:
                 return (
                     True,
-                    total_gpus_available,
                     [
                         GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
                         for ocid, gpu_count in optimal_combination.items()
                     ],
                 )
 
-        return (False, 0, [])
+        return (False, [])
 
     @staticmethod
     def get_combinations(input_dict: dict):
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index a660d691e..5ea768bac 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -884,18 +884,16 @@ def test_verify_compatibility(self):
         )
 
         assert result[0] == True
-        assert result[1] == 8
-        assert len(result[2]) == 3
+        assert len(result[1]) == 3
 
         result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(
             8, model_gpu_dict=TestDataset.model_gpu_dict, primary_model_id="model_b"
         )
 
         assert result[0] == True
-        assert result[1] == 8
-        assert len(result[2]) == 3
+        assert len(result[1]) == 3
 
-        for item in result[2]:
+        for item in result[1]:
             if item.ocid == "model_b":
                 # model_b gets the maximum gpu count
                 assert item.gpu_count == 4
@@ -905,8 +903,7 @@ def test_verify_compatibility(self):
         )
 
         assert result[0] == False
-        assert result[1] == 0
-        assert result[2] == []
+        assert result[1] == []
 
     @patch("ads.aqua.modeldeployment.deployment.get_container_config")
     @patch("ads.aqua.model.AquaModelApp.create")

From 86c75a4cc89d47bef23d804c2ba2259690ef8fdf Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Sat, 1 Mar 2025 00:04:26 -0800
Subject: [PATCH 067/124] Relax validation for the service models.

---
 ads/aqua/model/model.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index cd4114de7..6fcf7c6a0 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -266,11 +266,11 @@ def create_multi(
             display_name = source_model.display_name
             model.model_name = model.model_name or display_name
 
-            if not source_model.freeform_tags.get(Tags.AQUA_SERVICE_MODEL_TAG, UNKNOWN):
-                raise AquaValueError(
-                    f"Invalid selected model {display_name}. "
-                    "Currently only service models are supported for multi model deployment."
-                )
+            # if not source_model.freeform_tags.get(Tags.AQUA_SERVICE_MODEL_TAG, UNKNOWN):
+            #     raise AquaValueError(
+            #         f"Invalid selected model {display_name}. "
+            #         "Currently only service models are supported for multi model deployment."
+            #     )
 
             if (
                 source_model.freeform_tags.get(Tags.TASK, UNKNOWN)

From 798d1ef41f563aa9cf21217c1b5b777d3b9f88a1 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Sat, 1 Mar 2025 00:13:19 -0800
Subject: [PATCH 068/124] Fixes validation on text generation models.

---
 ads/aqua/model/model.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 6fcf7c6a0..ac4fca876 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -65,7 +65,6 @@
     FineTuningCustomMetadata,
     FineTuningMetricCategories,
     ModelCustomMetadataFields,
-    ModelTask,
     ModelType,
 )
 from ads.aqua.model.entities import (
@@ -272,13 +271,10 @@ def create_multi(
             #         "Currently only service models are supported for multi model deployment."
             #     )
 
-            if (
-                source_model.freeform_tags.get(Tags.TASK, UNKNOWN)
-                != ModelTask.TEXT_GENERATION
-            ):
+            if source_model.freeform_tags.get(Tags.TASK, UNKNOWN) != "text_generation":
                 raise AquaValueError(
                     f"Invalid or missing {Tags.TASK} tag for selected model {display_name}. "
-                    f"Currently only {ModelTask.TEXT_GENERATION} models are support for multi model deployment."
+                    f"Currently only `text_generation` models are support for multi model deployment."
                 )
 
             display_name_list.append(display_name)

From af2dfa43f944d82354497e562834654bed4a034a Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Sat, 1 Mar 2025 19:47:25 -0800
Subject: [PATCH 069/124] Enhance container family validation for multi-model
 deployment

---
 ads/aqua/common/enums.py             |  1 +
 ads/aqua/model/model.py              | 15 ++++++++----
 ads/aqua/modeldeployment/entities.py | 34 ++++++++++++++++++----------
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
index 0b9e6ab47..405cefb87 100644
--- a/ads/aqua/common/enums.py
+++ b/ads/aqua/common/enums.py
@@ -52,6 +52,7 @@ class InferenceContainerType(ExtendedEnum):
 
 class InferenceContainerTypeFamily(ExtendedEnum):
     AQUA_VLLM_CONTAINER_FAMILY = "odsc-vllm-serving"
+    AQUA_VLLM_V1_CONTAINER_FAMILY = "odsc-vllm-serving-v1"
     AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
     AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
 
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index ac4fca876..280290ad6 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -249,15 +249,19 @@ def create_multi(
         """
 
         if not models:
-            raise AquaValueError("Model list cannot be empty.")
+            raise AquaValueError(
+                "Model list cannot be empty. Please provide at least one model for deployment."
+            )
 
         artifact_list = []
         display_name_list = []
         model_custom_metadata = ModelCustomMetadata()
+
         # TODO: update it when more deployment containers are supported
-        default_deployment_container = (
+        supported_container_families = (
             InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY
         )
+        deployment_container = InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY
 
         # Process each model
         for idx, model in enumerate(models):
@@ -265,6 +269,7 @@ def create_multi(
             display_name = source_model.display_name
             model.model_name = model.model_name or display_name
 
+            # We cannot rely on this tag, service and cached models doesn't have it.
             # if not source_model.freeform_tags.get(Tags.AQUA_SERVICE_MODEL_TAG, UNKNOWN):
             #     raise AquaValueError(
             #         f"Invalid selected model {display_name}. "
@@ -297,10 +302,10 @@ def create_multi(
                 ),
             ).value
 
-            if default_deployment_container != deployment_container:
+            if deployment_container not in supported_container_families:
                 raise AquaValueError(
                     f"Unsupported deployment container '{deployment_container}' for model '{source_model.id}'. "
-                    f"Only '{InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY}' is supported for multi-model deployments."
+                    f"Only '{supported_container_families}' are supported for multi-model deployments."
                 )
 
             # Add model-specific metadata
@@ -353,7 +358,7 @@ def create_multi(
         # Add global metadata
         model_custom_metadata.add(
             key=ModelCustomMetadataFields.DEPLOYMENT_CONTAINER,
-            value=default_deployment_container,
+            value=deployment_container,
             description=f"Inference container mapping for {model_group_display_name}",
             category="Other",
         )
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index aaf2aed08..fee4ad44c 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -510,7 +510,8 @@ def validate_multimodel_deployment_feasibility(
         if not self.models:
             logger.error("User defined model group (List[AquaMultiModelRef]) is None.")
             raise ConfigValidationError(
-                "Multi-model deployment requires at least one model, but none were provided. Please add one or more models to the model group to proceed."
+                "Multi-model deployment requires at least one model, but none were provided. "
+                "Please add one or more models to the model group to proceed."
             )
 
         selected_shape = self.instance_shape
@@ -520,7 +521,8 @@ def validate_multimodel_deployment_feasibility(
                 f"The model group is not compatible with the selected instance shape {selected_shape}"
             )
             raise ConfigValidationError(
-                f"The model group is not compatible with the selected instance shape '{selected_shape}'. Select a different instance shape."
+                f"The model group is not compatible with the selected instance shape "
+                f"'{selected_shape}'. Select a different instance shape."
             )
 
         total_available_gpus = models_config_summary.gpu_allocation[
@@ -534,10 +536,12 @@ def validate_multimodel_deployment_feasibility(
 
         if len(missing_model_keys) > 0:
             logger.error(
-                f"Missing the following model entry with key {missing_model_keys} in ModelDeploymentConfigSummary"
+                f"Missing the following model entry with key {missing_model_keys} "
+                "in ModelDeploymentConfigSummary"
             )
             raise ConfigValidationError(
-                "One or more selected models are missing from the configuration, preventing validation for deployment on the given shape."
+                "One or more selected models are missing from the configuration, preventing "
+                "validation for deployment on the given shape."
             )
 
         sum_model_gpus = 0
@@ -549,15 +553,17 @@ def validate_multimodel_deployment_feasibility(
             # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
             # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
             # Our current configuration does not support this flexibility.
+
             # multi_deployment_shape = aqua_deployment_config.shape
-            multi_deployment_shape = list(aqua_deployment_config.configuration.keys())
 
-            if selected_shape not in multi_deployment_shape:
+            if selected_shape not in aqua_deployment_config.configuration:
                 logger.error(
-                    f"Model with OCID {model.model_id} in the model group is not compatible with the selected instance shape: {selected_shape}"
+                    f"Model with OCID {model.model_id} in the model group is not compatible "
+                    f"with the selected instance shape: {selected_shape}"
                 )
                 raise ConfigValidationError(
-                    "Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
+                    "Select a different instance shape. One or more models in the "
+                    "group are incompatible with the selected instance shape."
                 )
 
             multi_model_configs = aqua_deployment_config.configuration.get(
@@ -570,18 +576,22 @@ def validate_multimodel_deployment_feasibility(
             if model.gpu_count not in valid_gpu_configurations:
                 valid_gpu_str = ", ".join(map(str, valid_gpu_configurations))
                 logger.error(
-                    f"Model {model.model_id} allocated {model.gpu_count} GPUs by user, but its deployment configuration requires either {valid_gpu_str} GPUs."
+                    f"Model {model.model_id} allocated {model.gpu_count} GPUs by user, "
+                    f"but its deployment configuration requires either {valid_gpu_str} GPUs."
                 )
                 raise ConfigValidationError(
-                    "Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
+                    "Change the GPU count for one or more models in the model group. "
+                    "Adjust GPU allocations per model or choose a larger instance shape."
                 )
 
         if sum_model_gpus > total_available_gpus:
             logger.error(
-                f"Selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs."
+                f"Selected shape {selected_shape} has {total_available_gpus} "
+                f"GPUs while model group has {sum_model_gpus} GPUs."
             )
             raise ConfigValidationError(
-                "Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
+                "Total requested GPU count exceeds the available GPU capacity for the selected "
+                "instance shape. Adjust GPU allocations per model or choose a larger instance shape."
             )
 
     class Config:

From 28b2ab2a5c93957cf198e09af34ddc45c31267d7 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Sun, 2 Mar 2025 21:47:37 -0800
Subject: [PATCH 070/124] Adds more GPU shapes

---
 ads/aqua/common/constants.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/ads/aqua/common/constants.py b/ads/aqua/common/constants.py
index a799bacdc..c2f6a50e0 100644
--- a/ads/aqua/common/constants.py
+++ b/ads/aqua/common/constants.py
@@ -76,6 +76,16 @@
     "BM.GPU.A100-v2.8": {
         "gpu_type": "A100",
         "gpu_count": 8,
-        "gpu_memory_in_gbs": 320,
+        "gpu_memory_in_gbs": 640,
+    },
+    "BM.GPU.MI300X.8": {
+        "gpu_type": "MI300X",
+        "gpu_count": 8,
+        "gpu_memory_in_gbs": 1536,
+    },
+    "BM.GPU.L40S.4": {
+        "gpu_type": "L40S",
+        "gpu_count": 4,
+        "gpu_memory_in_gbs": 192,
     },
 }

From 10352031f83b91afc46a962d628456cae73dd930 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 3 Mar 2025 19:41:04 -0500
Subject: [PATCH 071/124] Improved to support custom model.

---
 ads/aqua/modeldeployment/utils.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index fffd4ddab..c1210ca64 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -6,6 +6,7 @@
 import copy
 import itertools
 import logging
+import math
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, List, Optional
 
@@ -77,15 +78,15 @@ def load(
         # Initialize the summary result with the deployment configurations.
         summary = ModelDeploymentConfigSummary(deployment_config=deployment)
 
-        # Ensure every model has at least one valid GPU configuration.
-        for model, shape_gpu in model_shape_gpu.items():
-            if not shape_gpu:
-                summary.error_message = (
-                    "Unable to determine a valid GPU allocation for the selected models based on their current configurations. "
-                    "Please try selecting a different set of models."
-                )
-                logger.debug(f"No valid GPU configuration found for model `{model}`")
-                return summary
+        # # Ensure every model has at least one valid GPU configuration.
+        # for model, shape_gpu in model_shape_gpu.items():
+        #     if not shape_gpu:
+        #         summary.error_message = (
+        #             "Unable to determine a valid GPU allocation for the selected models based on their current configurations. "
+        #             "Please try selecting a different set of models."
+        #         )
+        #         logger.debug(f"No valid GPU configuration found for model `{model}`")
+        #         return summary
 
         # Identify common deployment shapes among all models.
         common_shapes = self._get_common_shapes(model_shape_gpu)
@@ -222,6 +223,12 @@ def _compute_gpu_allocation(
                 if shape_gpu[common_shape]
             }
 
+            # assume a list of possible gpu count to model without multi model deployment config
+            model_gpu = {
+                model: (gpu if gpu else self._assume_gpu_list(total_gpus_available))
+                for model, gpu in model_gpu.items()
+            }
+
             if len(model_gpu) != len(model_shape_gpu):
                 continue
 
@@ -238,6 +245,11 @@ def _compute_gpu_allocation(
 
         return gpu_allocation
 
+    @staticmethod
+    def _assume_gpu_list(total_gpus_available: int) -> list[int]:
+        """Generates a list of powers of 2 that's smaller than `total_gpus_available`."""
+        return [2**i for i in range(math.log2(total_gpus_available) + 1)]
+
     def _verify_compatibility(
         self,
         total_gpus_available: int,

From 84474faa912d56eab31d3f07ce46e77fcca0756b Mon Sep 17 00:00:00 2001
From: Lu Peng <118394507+lu-ohai@users.noreply.github.com>
Date: Mon, 3 Mar 2025 19:45:14 -0500
Subject: [PATCH 072/124] Update deployment.py

---
 ads/aqua/modeldeployment/deployment.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 7d32aee90..19772f97d 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -499,6 +499,8 @@ def _create_multi(
             # replaces `--served-model-name`` with user's model name
             container_params_dict = get_params_dict(container_params)
             container_params_dict.update({"--served-model-name": model.model_name})
+            # replaces `--tensor-parallel-size` with model gpu count
+            container_params_dict.update({"--tensor-parallel-size": model.gpu_count})
             params = build_params_string(container_params_dict)
             deployment_config = self.get_deployment_config(model.model_id)
             multi_model_deployment = deployment_config.configuration.get(

From 24b30d79ea43733bdbb6e636927bb5f70547eb2b Mon Sep 17 00:00:00 2001
From: Lu Peng <118394507+lu-ohai@users.noreply.github.com>
Date: Mon, 3 Mar 2025 20:09:31 -0500
Subject: [PATCH 073/124] Update deployment.py

---
 ads/aqua/modeldeployment/deployment.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 19772f97d..78c4303a6 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -539,6 +539,13 @@ def _create_multi(
             )
 
         env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})})
+
+        for env in container_spec.get(ContainerSpec.ENV_VARS, []):
+            if isinstance(env, dict):
+                for key, _ in env.items():
+                    if key not in env_var:
+                        env_var.update(env)
+
         logger.info(f"Env vars used for deploying {aqua_model.id} : {env_var}.")
 
         container_image_uri = (

From 1a4661f98d06c9d62fba4bdedafa3c4585fc460e Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Tue, 4 Mar 2025 16:37:15 -0500
Subject: [PATCH 074/124] Added support to calculate gpu allocations for custom
 models.

---
 ads/aqua/modeldeployment/utils.py             |  87 ++++---
 .../with_extras/aqua/test_deployment.py       | 236 ++++++++++++++++--
 2 files changed, 268 insertions(+), 55 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index c1210ca64..61362eb9d 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -12,6 +12,7 @@
 
 from ads.aqua.app import AquaApp
 from ads.aqua.common.entities import ComputeShapeSummary
+from ads.aqua.constants import UNKNOWN
 from ads.aqua.modeldeployment.entities import (
     AquaDeploymentConfig,
     ConfigurationItem,
@@ -78,28 +79,24 @@ def load(
         # Initialize the summary result with the deployment configurations.
         summary = ModelDeploymentConfigSummary(deployment_config=deployment)
 
-        # # Ensure every model has at least one valid GPU configuration.
-        # for model, shape_gpu in model_shape_gpu.items():
-        #     if not shape_gpu:
-        #         summary.error_message = (
-        #             "Unable to determine a valid GPU allocation for the selected models based on their current configurations. "
-        #             "Please try selecting a different set of models."
-        #         )
-        #         logger.debug(f"No valid GPU configuration found for model `{model}`")
-        #         return summary
-
         # Identify common deployment shapes among all models.
-        common_shapes = self._get_common_shapes(model_shape_gpu)
+        common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
         logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
 
         # Filter out not available shapes
         available_shapes = [item.name.upper() for item in shapes]
         logger.debug(f"Service Available Shapes: {available_shapes}")
-        common_shapes = [
-            shape_name
-            for shape_name in common_shapes
-            if shape_name.upper() in available_shapes
-        ]
+
+        # If all models' shape configs are empty, use default deployment shapes instead
+        common_shapes = (
+            available_shapes
+            if empty_configs
+            else [
+                shape_name
+                for shape_name in common_shapes
+                if shape_name.upper() in available_shapes
+            ]
+        )
         logger.debug(f"Available Common Shapes: {common_shapes}")
 
         if not common_shapes:
@@ -188,13 +185,17 @@ def _extract_model_shape_gpu(
 
     def _get_common_shapes(
         self, model_shape_gpu: Dict[str, Dict[str, List[int]]]
-    ) -> List[str]:
+    ) -> tuple:
         """Finds common shapes across all models."""
-        return list(
-            set.intersection(
-                *(set(shapes.keys()) for shapes in model_shape_gpu.values())
-            )
-        )
+        common_shapes_set = []
+        empty_configs = True
+        for shapes in model_shape_gpu.values():
+            if shapes:
+                common_shapes_set.append(set(shapes.keys()))
+                empty_configs = False
+        if not common_shapes_set:
+            return [], empty_configs
+        return list(set.intersection(*(common_shapes_set))), empty_configs
 
     def _compute_gpu_allocation(
         self,
@@ -217,21 +218,17 @@ def _compute_gpu_allocation(
             if shape_summary and shape_summary.gpu_specs:
                 total_gpus_available = shape_summary.gpu_specs.gpu_count
 
+            # generate a list of possible gpu count from `total_gpus_available` for custom models
+            # without multi model deployment config
             model_gpu = {
-                model: shape_gpu[common_shape]
+                model: (
+                    shape_gpu[common_shape]
+                    if shape_gpu.get(common_shape, UNKNOWN)
+                    else self._generate_gpu_list(total_gpus_available)
+                )
                 for model, shape_gpu in model_shape_gpu.items()
-                if shape_gpu[common_shape]
-            }
-
-            # assume a list of possible gpu count to model without multi model deployment config
-            model_gpu = {
-                model: (gpu if gpu else self._assume_gpu_list(total_gpus_available))
-                for model, gpu in model_gpu.items()
             }
 
-            if len(model_gpu) != len(model_shape_gpu):
-                continue
-
             is_compatible, combination = self._verify_compatibility(
                 total_gpus_available=total_gpus_available,
                 model_gpu_dict=model_gpu,
@@ -246,9 +243,27 @@ def _compute_gpu_allocation(
         return gpu_allocation
 
     @staticmethod
-    def _assume_gpu_list(total_gpus_available: int) -> list[int]:
-        """Generates a list of powers of 2 that's smaller than `total_gpus_available`."""
-        return [2**i for i in range(math.log2(total_gpus_available) + 1)]
+    def _generate_gpu_list(total_gpus_available: int) -> list[int]:
+        """Generates a list of powers of 2 that's smaller than or equal to `total_gpus_available`.
+
+        Example
+        -------
+        input: 8
+        output: [1,2,4,8]
+
+        Parameters
+        ----------
+        total_gpus_available : int
+            Total GPU available
+
+        Returns
+        -------
+        list
+            A list of powers of 2.
+        """
+        if total_gpus_available < 1:
+            return []
+        return [2**i for i in range(int(math.log2(total_gpus_available)) + 1)]
 
     def _verify_compatibility(
         self,
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 5ea768bac..c41df843d 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -593,6 +593,196 @@ class TestDataset:
         "error_message": None,
     }
 
+    aqua_deployment_multi_model_config_summary_hybrid = {
+        "deployment_config": {
+            "model_a": {
+                "shape": [
+                    "BM.GPU.A100-v2.8",
+                    "BM.GPU.H100.8",
+                    "VM.GPU.A10.2",
+                    "VM.GPU.A10.4",
+                ],
+                "configuration": {
+                    "VM.GPU.A10.2": {
+                        "parameters": {},
+                        "multi_model_deployment": [
+                            {
+                                "gpu_count": 2,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            }
+                        ],
+                        "shape_info": {"configs": [], "type": ""},
+                    },
+                    "VM.GPU.A10.4": {
+                        "parameters": {
+                            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                        },
+                        "multi_model_deployment": [
+                            {
+                                "gpu_count": 2,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            },
+                            {"gpu_count": 4, "parameters": {}},
+                        ],
+                        "shape_info": {"configs": [], "type": ""},
+                    },
+                    "BM.GPU.A100-v2.8": {
+                        "parameters": {
+                            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                        },
+                        "multi_model_deployment": [
+                            {
+                                "gpu_count": 1,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            },
+                            {
+                                "gpu_count": 2,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            },
+                            {
+                                "gpu_count": 8,
+                                "parameters": {
+                                    "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+                                },
+                            },
+                        ],
+                        "shape_info": {"configs": [], "type": ""},
+                    },
+                    "BM.GPU.H100.8": {
+                        "parameters": {
+                            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+                        },
+                        "multi_model_deployment": [
+                            {"gpu_count": 1, "parameters": {}},
+                            {"gpu_count": 2, "parameters": {}},
+                            {"gpu_count": 8, "parameters": {}},
+                        ],
+                        "shape_info": {"configs": [], "type": ""},
+                    },
+                },
+            },
+            "model_b": {
+                "configuration": {},
+                "shape": [],
+            },
+            "model_c": {
+                "configuration": {},
+                "shape": [],
+            },
+        },
+        "gpu_allocation": {
+            "BM.GPU.H100.8": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 4},
+                ],
+                "total_gpus_available": 8,
+            },
+            "VM.GPU.A10.4": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 1},
+                    {"ocid": "model_c", "gpu_count": 1},
+                ],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU.A100-v2.8": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 4},
+                ],
+                "total_gpus_available": 8,
+            },
+        },
+        "error_message": None,
+    }
+
+    aqua_deployment_multi_model_config_summary_all_empty = {
+        "deployment_config": {
+            "model_a": {
+                "configuration": {},
+                "shape": [],
+            },
+            "model_b": {
+                "configuration": {},
+                "shape": [],
+            },
+            "model_c": {
+                "configuration": {},
+                "shape": [],
+            },
+        },
+        "gpu_allocation": {
+            "VM.GPU3.4": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 1},
+                    {"ocid": "model_b", "gpu_count": 1},
+                    {"ocid": "model_c", "gpu_count": 2},
+                ],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU3.8": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 4},
+                ],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU4.8": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 4},
+                ],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.H100.8": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 2},
+                    {"ocid": "model_b", "gpu_count": 2},
+                    {"ocid": "model_c", "gpu_count": 4},
+                ],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.A10.4": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 1},
+                    {"ocid": "model_b", "gpu_count": 1},
+                    {"ocid": "model_c", "gpu_count": 2},
+                ],
+                "total_gpus_available": 4,
+            },
+            "VM.GPU.A10.4": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 1},
+                    {"ocid": "model_b", "gpu_count": 1},
+                    {"ocid": "model_c", "gpu_count": 2},
+                ],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU.L40S-NC.4": {
+                "models": [
+                    {"ocid": "model_a", "gpu_count": 1},
+                    {"ocid": "model_b", "gpu_count": 1},
+                    {"ocid": "model_c", "gpu_count": 2},
+                ],
+                "total_gpus_available": 4,
+            },
+        },
+        "error_message": None,
+    }
+
     model_gpu_dict = {"model_a": [2, 4], "model_b": [1, 2, 4], "model_c": [1, 2, 8]}
     incompatible_model_gpu_dict = {
         "model_a": [1, 2],
@@ -828,24 +1018,12 @@ def test_get_multimodel_deployment_config(
             == TestDataset.aqua_deployment_multi_model_config_summary
         )
 
-    @parameterized.expand(
-        [
-            [
-                "configuration",
-                "Unable to determine a valid GPU allocation for the selected models based on their current configurations. Please try selecting a different set of models.",
-            ],
-        ]
-    )
     @patch(
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )
     @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes")
-    def test_get_multimodel_compatible_shapes_invalid_config(
-        self,
-        missing_key,
-        error,
-        mock_list_shapes,
-        mock_fetch_deployment_configs_concurrently,
+    def test_get_multimodel_deployment_config_hybrid(
+        self, mock_list_shapes, mock_fetch_deployment_configs_concurrently
     ):
         config_json = os.path.join(
             self.curr_dir,
@@ -854,8 +1032,6 @@ def test_get_multimodel_compatible_shapes_invalid_config(
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
-        config.pop(missing_key)
-
         shapes = []
 
         with open(
@@ -871,12 +1047,34 @@ def test_get_multimodel_compatible_shapes_invalid_config(
         mock_list_shapes.return_value = shapes
 
         mock_fetch_deployment_configs_concurrently.return_value = {
-            "model_a": AquaDeploymentConfig(**config)
+            "model_a": AquaDeploymentConfig(**config),
+            "model_b": AquaDeploymentConfig(),
+            "model_c": AquaDeploymentConfig(),
         }
+        result = self.app.get_multimodel_deployment_config(
+            ["model_a", "model_b", "model_c"]
+        )
 
-        test_config = self.app.get_multimodel_deployment_config(["model_a"])
+        assert (
+            result.model_dump()
+            == TestDataset.aqua_deployment_multi_model_config_summary_hybrid
+        )
 
-        assert test_config.error_message == error
+        # all custom models without deployment config
+        # deployment shape should be collected from `list_shapes` and gpu list will be generated by ads sdk.
+        mock_fetch_deployment_configs_concurrently.return_value = {
+            "model_a": AquaDeploymentConfig(),
+            "model_b": AquaDeploymentConfig(),
+            "model_c": AquaDeploymentConfig(),
+        }
+        result = self.app.get_multimodel_deployment_config(
+            ["model_a", "model_b", "model_c"]
+        )
+
+        assert (
+            result.model_dump()
+            == TestDataset.aqua_deployment_multi_model_config_summary_all_empty
+        )
 
     def test_verify_compatibility(self):
         result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(

From 62512515e7041313e09fad7be224576816451117 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Tue, 4 Mar 2025 17:49:24 -0500
Subject: [PATCH 075/124] Updated pr.

---
 ads/aqua/modeldeployment/utils.py             |  8 ++---
 .../with_extras/aqua/test_deployment.py       | 29 +++++++++++++++----
 2 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 61362eb9d..707b46f05 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -165,7 +165,7 @@ def _extract_model_shape_gpu(
             # multi_deployment_shape = config.shape
             multi_deployment_shape = list(config.configuration.keys())
             model_shape_gpu[model_id] = {
-                shape: [
+                shape.upper(): [
                     item.gpu_count
                     for item in config.configuration.get(
                         shape, ConfigurationItem()
@@ -174,9 +174,9 @@ def _extract_model_shape_gpu(
                 for shape in multi_deployment_shape
             }
             deployment[model_id] = {
-                "shape": multi_deployment_shape,
+                "shape": [shape.upper() for shape in multi_deployment_shape],
                 "configuration": {
-                    shape: config.configuration.get(shape, ConfigurationItem())
+                    shape.upper(): config.configuration.get(shape, ConfigurationItem())
                     for shape in multi_deployment_shape
                 },
             }
@@ -212,7 +212,7 @@ def _compute_gpu_allocation(
 
             # search the shape in the available shapes list
             shape_summary = next(
-                (shape for shape in shapes if shape.name == common_shape),
+                (shape for shape in shapes if shape.name.upper() == common_shape),
                 None,
             )
             if shape_summary and shape_summary.gpu_specs:
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index c41df843d..cf86c3288 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -499,7 +499,7 @@ class TestDataset:
         "deployment_config": {
             "model_a": {
                 "shape": [
-                    "BM.GPU.A100-v2.8",
+                    "BM.GPU.A100-V2.8",
                     "BM.GPU.H100.8",
                     "VM.GPU.A10.2",
                     "VM.GPU.A10.4",
@@ -532,7 +532,7 @@ class TestDataset:
                         ],
                         "shape_info": {"configs": [], "type": ""},
                     },
-                    "BM.GPU.A100-v2.8": {
+                    "BM.GPU.A100-V2.8": {
                         "parameters": {
                             "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
                         },
@@ -581,7 +581,7 @@ class TestDataset:
                 "models": [{"ocid": "model_a", "gpu_count": 4}],
                 "total_gpus_available": 4,
             },
-            "BM.GPU.A100-v2.8": {
+            "BM.GPU.A100-V2.8": {
                 "models": [{"ocid": "model_a", "gpu_count": 8}],
                 "total_gpus_available": 8,
             },
@@ -597,7 +597,7 @@ class TestDataset:
         "deployment_config": {
             "model_a": {
                 "shape": [
-                    "BM.GPU.A100-v2.8",
+                    "BM.GPU.A100-V2.8",
                     "BM.GPU.H100.8",
                     "VM.GPU.A10.2",
                     "VM.GPU.A10.4",
@@ -630,7 +630,7 @@ class TestDataset:
                         ],
                         "shape_info": {"configs": [], "type": ""},
                     },
-                    "BM.GPU.A100-v2.8": {
+                    "BM.GPU.A100-V2.8": {
                         "parameters": {
                             "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
                         },
@@ -695,7 +695,7 @@ class TestDataset:
                 ],
                 "total_gpus_available": 4,
             },
-            "BM.GPU.A100-v2.8": {
+            "BM.GPU.A100-V2.8": {
                 "models": [
                     {"ocid": "model_a", "gpu_count": 2},
                     {"ocid": "model_b", "gpu_count": 2},
@@ -747,6 +747,23 @@ class TestDataset:
                 ],
                 "total_gpus_available": 8,
             },
+            "BM.GPU.A100-V2.8": {
+                "models": [
+                    {
+                        "gpu_count": 2,
+                        "ocid": "model_a",
+                    },
+                    {
+                        "gpu_count": 2,
+                        "ocid": "model_b",
+                    },
+                    {
+                        "gpu_count": 4,
+                        "ocid": "model_c",
+                    },
+                ],
+                "total_gpus_available": 8,
+            },
             "BM.GPU.H100.8": {
                 "models": [
                     {"ocid": "model_a", "gpu_count": 2},

From 95d810b1d4fa1850d55e574088d638b59482f308 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Wed, 5 Mar 2025 12:41:22 -0800
Subject: [PATCH 076/124] Moves the GPU shapes into index json.

---
 ads/aqua/common/constants.py                  | 88 ------------------
 ads/aqua/common/entities.py                   | 40 ++++++---
 ads/aqua/common/utils.py                      | 58 +++++++++++-
 .../evaluation_service_model_config.py        |  8 --
 ads/aqua/modeldeployment/deployment.py        |  5 ++
 ads/aqua/resources/gpu_shapes_index.json      | 89 +++++++++++++++++++
 6 files changed, 178 insertions(+), 110 deletions(-)
 delete mode 100644 ads/aqua/config/evaluation/evaluation_service_model_config.py
 create mode 100644 ads/aqua/resources/gpu_shapes_index.json

diff --git a/ads/aqua/common/constants.py b/ads/aqua/common/constants.py
index c2f6a50e0..7fe1878a1 100644
--- a/ads/aqua/common/constants.py
+++ b/ads/aqua/common/constants.py
@@ -1,91 +1,3 @@
 #!/usr/bin/env python
 # Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-
-GPU_SPECS = {
-    "VM.GPU2.1": {
-        "gpu_type": "P100",
-        "gpu_count": 1,
-        "gpu_memory_in_gbs": 16,
-    },
-    "VM.GPU3.1": {
-        "gpu_type": "V100",
-        "gpu_count": 1,
-        "gpu_memory_in_gbs": 16,
-    },
-    "VM.GPU3.2": {
-        "gpu_type": "V100",
-        "gpu_count": 2,
-        "gpu_memory_in_gbs": 32,
-    },
-    "VM.GPU3.4": {
-        "gpu_type": "V100",
-        "gpu_count": 4,
-        "gpu_memory_in_gbs": 64,
-    },
-    "BM.GPU2.2": {
-        "gpu_type": "P100",
-        "gpu_count": 2,
-        "gpu_memory_in_gbs": 32,
-    },
-    "BM.GPU3.8": {
-        "gpu_type": "V100",
-        "gpu_count": 8,
-        "gpu_memory_in_gbs": 128,
-    },
-    "BM.GPU4.8": {
-        "gpu_type": "A100",
-        "gpu_count": 8,
-        "gpu_memory_in_gbs": 320,
-    },
-    "BM.GPU.A10.4": {
-        "gpu_type": "A10",
-        "gpu_count": 4,
-        "gpu_memory_in_gbs": 96,
-    },
-    "VM.GPU.A10.4": {
-        "gpu_type": "A10",
-        "gpu_count": 4,
-        "gpu_memory_in_gbs": 96,
-    },
-    "BM.GPU.H100.8": {
-        "gpu_type": "H100",
-        "gpu_count": 8,
-        "gpu_memory_in_gbs": 640,
-    },
-    "VM.GPU.A10.1": {
-        "gpu_type": "A10",
-        "gpu_count": 1,
-        "gpu_memory_in_gbs": 24,
-    },
-    "VM.GPU.A10.2": {
-        "gpu_type": "A10",
-        "gpu_count": 2,
-        "gpu_memory_in_gbs": 48,
-    },
-    "BM.GPU.L40S-NC.4": {
-        "gpu_type": "L40S",
-        "gpu_count": 4,
-        "gpu_memory_in_gbs": 192,
-    },
-    "BM.GPU.H200.8": {
-        "gpu_type": "H200",
-        "gpu_count": 8,
-        "gpu_memory_in_gbs": 1128,
-    },
-    "BM.GPU.A100-v2.8": {
-        "gpu_type": "A100",
-        "gpu_count": 8,
-        "gpu_memory_in_gbs": 640,
-    },
-    "BM.GPU.MI300X.8": {
-        "gpu_type": "MI300X",
-        "gpu_count": 8,
-        "gpu_memory_in_gbs": 1536,
-    },
-    "BM.GPU.L40S.4": {
-        "gpu_type": "L40S",
-        "gpu_count": 4,
-        "gpu_memory_in_gbs": 192,
-    },
-}
diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 7a3deb851..05d264f94 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -3,12 +3,11 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import re
-from typing import Optional
+from typing import Dict, Optional
 
 from pydantic import Field, model_validator
 
 from ads.aqua.app import logger
-from ads.aqua.common.constants import GPU_SPECS
 from ads.aqua.config.utils.serializer import Serializable
 
 
@@ -42,6 +41,20 @@ class GPUSpecs(Serializable):
     )
 
 
+class GPUShapesIndex(Serializable):
+    """
+    Represents the index of GPU shapes.
+
+    Attributes
+    ----------
+    shapes (Dict[str, GPUSpecs]): A mapping of compute shape names to their GPU specifications.
+    """
+
+    shapes: Dict[str, GPUSpecs] = Field(
+        ..., description="Mapping of shape names to GPU specifications."
+    )
+
+
 class ComputeShapeSummary(Serializable):
     """
     Represents the specifications of a compute instance's shape.
@@ -81,18 +94,19 @@ def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary":
             ComputeShapeSummary: The updated instance with gpu_specs populated if applicable.
         """
         try:
-            if model.shape_series and "GPU" in model.shape_series.upper():
-                if model.name and model.name in GPU_SPECS:
-                    gpu_info = GPU_SPECS[model.name]
-                    model.gpu_specs = GPUSpecs(**gpu_info)
-                elif model.name:
-                    # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
-                    match = re.search(r"\.(\d+)$", model.name)
-                    if match:
-                        gpu_count = int(match.group(1))
-                        model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
+            if (
+                model.shape_series
+                and "GPU" in model.shape_series.upper()
+                and model.name
+                and not model.gpu_specs
+            ):
+                # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2)
+                match = re.search(r"\.(\d+)$", model.name)
+                if match:
+                    gpu_count = int(match.group(1))
+                    model.gpu_specs = GPUSpecs(gpu_count=gpu_count)
         except Exception as err:
-            logger.info(
+            logger.debug(
                 f"Error occurred in attempt to extract GPU specification for the f{model.name}. "
                 f"Details: {err}"
             )
diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index 3a19ca6ce..bcb2677ce 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -17,7 +17,7 @@
 from functools import wraps
 from pathlib import Path
 from string import Template
-from typing import List, Union
+from typing import Any, Dict, List, Union
 
 import fsspec
 import oci
@@ -35,6 +35,7 @@
 from oci.object_storage.models import ObjectSummary
 from pydantic import BaseModel, ValidationError
 
+from ads.aqua.common.entities import GPUShapesIndex
 from ads.aqua.common.enums import (
     InferenceContainerParamType,
     InferenceContainerType,
@@ -72,6 +73,7 @@
 from ads.config import (
     AQUA_MODEL_DEPLOYMENT_FOLDER,
     AQUA_SERVICE_MODELS_BUCKET,
+    CONDA_BUCKET_NAME,
     CONDA_BUCKET_NS,
     TENANCY_OCID,
 )
@@ -1252,3 +1254,57 @@ def is_pydantic_model(obj: object) -> bool:
     """
     cls = obj if isinstance(obj, type) else type(obj)
     return issubclass(cls, BaseModel)
+
+
+@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
+def load_gpu_shapes_index() -> GPUShapesIndex:
+    """
+    Loads the GPU shapes index from Object Storage or a local resource folder.
+
+    The function first attempts to load the file from an Object Storage bucket using fsspec.
+    If the loading fails (due to connection issues, missing file, etc.), it falls back to
+    loading the index from a local file.
+
+    Returns
+    -------
+    GPUShapesIndex: The parsed GPU shapes index.
+
+    Raises
+    ------
+    FileNotFoundError: If the GPU shapes index cannot be found in either Object Storage or locally.
+    json.JSONDecodeError: If the JSON is malformed.
+    """
+    file_name = "gpu_shapes_index.json"
+    data: Dict[str, Any] = {}
+
+    # Check if the CONDA_BUCKET_NS environment variable is set.
+    if CONDA_BUCKET_NS:
+        try:
+            # Construct the object storage path. Adjust bucket name and path as needed.
+            storage_path = f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/{file_name}"
+            logger.debug("Loading GPU shapes index from Object Storage")
+            with fsspec.open(storage_path, mode="r") as file_obj:
+                data = json.load(file_obj)
+            logger.debug("Successfully loaded GPU shapes index.")
+        except Exception as e:
+            logger.debug(
+                f"Failed to load GPU shapes index from Object Storage: {e}. "
+                "Falling back to local resource."
+            )
+
+    # If loading from Object Storage failed, load from the local resource folder.
+    if not data:
+        try:
+            local_path = os.path.join(
+                os.path.dirname(__file__), "../resources", file_name
+            )
+            logger.debug(f"Loading GPU shapes index from {local_path}.")
+            with open(local_path) as file_obj:
+                data = json.load(file_obj)
+            logger.debug("Successfully loaded GPU shapes index.")
+        except Exception as e:
+            logger.debug(
+                f"Failed to load GPU shapes index from {local_path}. Details: {e}"
+            )
+
+    return GPUShapesIndex(**data)
diff --git a/ads/aqua/config/evaluation/evaluation_service_model_config.py b/ads/aqua/config/evaluation/evaluation_service_model_config.py
deleted file mode 100644
index 911fe3176..000000000
--- a/ads/aqua/config/evaluation/evaluation_service_model_config.py
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright (c) 2024 Oracle and/or its affiliates.
-# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-
-"""
-This serves as a future template for implementing model-specific evaluation configurations.
-"""
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 78c4303a6..8e3fc6f8a 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -32,6 +32,7 @@
     get_params_list,
     get_resource_name,
     get_restricted_params_by_container,
+    load_gpu_shapes_index,
     validate_cmd_var,
 )
 from ads.aqua.constants import (
@@ -1184,12 +1185,16 @@ def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
             compartment_id=compartment_id,
             **kwargs,
         )
+
+        gpu_specs = load_gpu_shapes_index()
+
         return [
             ComputeShapeSummary(
                 core_count=oci_shape.core_count,
                 memory_in_gbs=oci_shape.memory_in_gbs,
                 shape_series=oci_shape.shape_series,
                 name=oci_shape.name,
+                gpu_specs=gpu_specs.shapes.get(oci_shape.name),
             )
             for oci_shape in oci_shapes
         ]
diff --git a/ads/aqua/resources/gpu_shapes_index.json b/ads/aqua/resources/gpu_shapes_index.json
new file mode 100644
index 000000000..7f6b6b37f
--- /dev/null
+++ b/ads/aqua/resources/gpu_shapes_index.json
@@ -0,0 +1,89 @@
+{
+  "shapes": {
+    "BM.GPU.A10.4": {
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 96,
+      "gpu_type": "A10"
+    },
+    "BM.GPU.A100-V2.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 640,
+      "gpu_type": "A100"
+    },
+    "BM.GPU.H100.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 640,
+      "gpu_type": "H100"
+    },
+    "BM.GPU.H200.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1128,
+      "gpu_type": "H200"
+    },
+    "BM.GPU.L40S-NC.4": {
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 192,
+      "gpu_type": "L40S"
+    },
+    "BM.GPU.L40S.4": {
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 192,
+      "gpu_type": "L40S"
+    },
+    "BM.GPU.MI300X.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1536,
+      "gpu_type": "MI300X"
+    },
+    "BM.GPU2.2": {
+      "gpu_count": 2,
+      "gpu_memory_in_gbs": 32,
+      "gpu_type": "P100"
+    },
+    "BM.GPU3.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 128,
+      "gpu_type": "V100"
+    },
+    "BM.GPU4.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 320,
+      "gpu_type": "A100"
+    },
+    "VM.GPU.A10.1": {
+      "gpu_count": 1,
+      "gpu_memory_in_gbs": 24,
+      "gpu_type": "A10"
+    },
+    "VM.GPU.A10.2": {
+      "gpu_count": 2,
+      "gpu_memory_in_gbs": 48,
+      "gpu_type": "A10"
+    },
+    "VM.GPU.A10.4": {
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 96,
+      "gpu_type": "A10"
+    },
+    "VM.GPU2.1": {
+      "gpu_count": 1,
+      "gpu_memory_in_gbs": 16,
+      "gpu_type": "P100"
+    },
+    "VM.GPU3.1": {
+      "gpu_count": 1,
+      "gpu_memory_in_gbs": 16,
+      "gpu_type": "V100"
+    },
+    "VM.GPU3.2": {
+      "gpu_count": 2,
+      "gpu_memory_in_gbs": 32,
+      "gpu_type": "V100"
+    },
+    "VM.GPU3.4": {
+      "gpu_count": 4,
+      "gpu_memory_in_gbs": 64,
+      "gpu_type": "V100"
+    }
+  }
+}

From 024c016cc0d1d48dbd3739216ce1943b12bc9f83 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Wed, 5 Mar 2025 15:57:41 -0800
Subject: [PATCH 077/124] Fixes the import error.

---
 ads/aqua/common/entities.py                   |  2 +-
 ads/aqua/common/utils.py                      | 24 ++++++++---
 ads/aqua/modeldeployment/deployment.py        | 43 ++++++++++++++-----
 .../with_extras/aqua/test_common_entities.py  |  5 +++
 4 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 05d264f94..92c3a3602 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -7,7 +7,7 @@
 
 from pydantic import Field, model_validator
 
-from ads.aqua.app import logger
+from ads.aqua import logger
 from ads.aqua.config.utils.serializer import Serializable
 
 
diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index bcb2677ce..efd935015 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -17,7 +17,7 @@
 from functools import wraps
 from pathlib import Path
 from string import Template
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 import fsspec
 import oci
@@ -64,6 +64,7 @@
     VLLM_INFERENCE_RESTRICTED_PARAMS,
 )
 from ads.aqua.data import AquaResourceIdentifier
+from ads.common import auth as authutil
 from ads.common.auth import AuthState, default_signer
 from ads.common.decorator.threaded import threaded
 from ads.common.extended_enum import ExtendedEnum
@@ -1257,7 +1258,9 @@ def is_pydantic_model(obj: object) -> bool:
 
 
 @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))
-def load_gpu_shapes_index() -> GPUShapesIndex:
+def load_gpu_shapes_index(
+    auth: Optional[Dict] = None,
+) -> GPUShapesIndex:
     """
     Loads the GPU shapes index from Object Storage or a local resource folder.
 
@@ -1265,6 +1268,13 @@ def load_gpu_shapes_index() -> GPUShapesIndex:
     If the loading fails (due to connection issues, missing file, etc.), it falls back to
     loading the index from a local file.
 
+    Parameters
+    ----------
+    auth: (Dict, optional). Defaults to None.
+        The default authentication is set using `ads.set_auth` API. If you need to override the
+        default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
+        authentication signer and kwargs required to instantiate IdentityClient object.
+
     Returns
     -------
     GPUShapesIndex: The parsed GPU shapes index.
@@ -1280,16 +1290,16 @@ def load_gpu_shapes_index() -> GPUShapesIndex:
     # Check if the CONDA_BUCKET_NS environment variable is set.
     if CONDA_BUCKET_NS:
         try:
+            auth = auth or authutil.default_signer()
             # Construct the object storage path. Adjust bucket name and path as needed.
-            storage_path = f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/{file_name}"
+            storage_path = f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/{file_name}/1"
             logger.debug("Loading GPU shapes index from Object Storage")
-            with fsspec.open(storage_path, mode="r") as file_obj:
+            with fsspec.open(storage_path, mode="r", **auth) as file_obj:
                 data = json.load(file_obj)
             logger.debug("Successfully loaded GPU shapes index.")
-        except Exception as e:
+        except Exception as ex:
             logger.debug(
-                f"Failed to load GPU shapes index from Object Storage: {e}. "
-                "Falling back to local resource."
+                f"Failed to load GPU shapes index from Object Storage. Details: {ex}"
             )
 
     # If loading from Object Storage failed, load from the local resource folder.
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 8e3fc6f8a..7a8b62615 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -144,12 +144,9 @@ def create(
                     f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
                 ) from ex
 
-        # Extract model_id from the provided deployment details.
-        model_id = create_deployment_details.model_id
-
         # If a single model is provided, delegate to `create` method
         if (
-            not model_id
+            not create_deployment_details.model_id
             and create_deployment_details.models
             and len(create_deployment_details.models) == 1
         ):
@@ -158,7 +155,7 @@ def create(
                 f"Single model ({single_model.model_id}) provided. "
                 "Delegating to single model creation method."
             )
-            model_id = single_model.model_id
+            create_deployment_details.model_id = single_model.model_id
 
         # Set defaults for compartment and project if not provided.
         compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
@@ -168,9 +165,9 @@ def create(
 
         # Create an AquaModelApp instance once to perform the deployment creation.
         model_app = AquaModelApp()
-        if model_id:
+        if create_deployment_details.model_id:
             aqua_model = model_app.create(
-                model_id=model_id,
+                model_id=create_deployment_details.model_id,
                 compartment_id=compartment_id,
                 project_id=project_id,
                 freeform_tags=freeform_tags,
@@ -182,21 +179,45 @@ def create(
             )
         else:
             model_ids = [model.model_id for model in create_deployment_details.models]
-
             try:
                 model_config_summary = self.get_multimodel_deployment_config(
                     model_ids=model_ids
                 )
-
                 if not model_config_summary.gpu_allocation:
                     raise AquaValueError(model_config_summary.error_message)
-
                 create_deployment_details.validate_multimodel_deployment_feasibility(
                     models_config_summary=model_config_summary
                 )
             except ConfigValidationError as err:
                 raise AquaValueError(f"{err}") from err
 
+            # TODO: update it when more deployment containers are supported
+            supported_container_families = (
+                InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY
+            )
+
+            # Check if provided container family supports multi-model deployment
+            if (
+                create_deployment_details.container_family
+                and create_deployment_details.container_family
+                not in supported_container_families
+            ):
+                raise AquaValueError(
+                    f"Unsupported deployment container '{create_deployment_details.container_family}'. "
+                    f"Only '{supported_container_families}' are supported for multi-model deployments."
+                )
+
+            # Verify if it matches one of the registered containers and attempt to
+            # extract the container family from there.
+            # If the container is not recognized, we can only issue a warning that
+            # the provided container may not support multi-model deployment.
+            if create_deployment_details.container_image_uri:
+                # TODO Add registered container validation
+                logger.warning(
+                    f"The provided container `{create_deployment_details.container_image_uri}` may not support multi-model deployment. "
+                    f"Only the following container families are supported: `{supported_container_families}`."
+                )
+
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
                 compartment_id=compartment_id,
@@ -386,7 +407,7 @@ def _create(
             or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT)
         )  # Give precedence to the input parameter
 
-        deployment_config = self.get_deployment_config(config_source_id)
+        deployment_config = self.get_deployment_config(model_id=config_source_id)
 
         config_params = deployment_config.configuration.get(
             create_deployment_details.instance_shape, ConfigurationItem()
diff --git a/tests/unitary/with_extras/aqua/test_common_entities.py b/tests/unitary/with_extras/aqua/test_common_entities.py
index 7d3a67aef..23d5e134f 100644
--- a/tests/unitary/with_extras/aqua/test_common_entities.py
+++ b/tests/unitary/with_extras/aqua/test_common_entities.py
@@ -20,6 +20,11 @@ class TestComputeShapeSummary:
                     "memory_in_gbs": 512,
                     "name": "VM.GPU2.1",
                     "shape_series": "GPU",
+                    "gpu_specs": {
+                        "gpu_type": "P100",
+                        "gpu_count": 1,
+                        "gpu_memory_in_gbs": 16,
+                    },
                 },
                 {"gpu_type": "P100", "gpu_count": 1, "gpu_memory_in_gbs": 16},
             ),

From 18d987bfa29fdf4ed5d365b184354a5603f8ab8d Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Wed, 5 Mar 2025 16:51:55 -0800
Subject: [PATCH 078/124] Fixes tests

---
 ads/aqua/common/entities.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index 92c3a3602..eaf6b96df 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -51,7 +51,8 @@ class GPUShapesIndex(Serializable):
     """
 
     shapes: Dict[str, GPUSpecs] = Field(
-        ..., description="Mapping of shape names to GPU specifications."
+        default_factory=dict,
+        description="Mapping of shape names to GPU specifications.",
     )
 
 

From d51c444b0395f084876d8d2377f9a2725a57b2c4 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Wed, 5 Mar 2025 17:14:42 -0800
Subject: [PATCH 079/124] Fixes merging conflicts

---
 ads/aqua/modeldeployment/deployment.py       | 1 -
 tests/unitary/with_extras/aqua/test_utils.py | 6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index c30eb1a5e..12b87c127 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -60,7 +60,6 @@
     ModelDeploymentConfigSummary,
 )
 from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
-from ads.aqua.ui import ModelFormat
 from ads.common.object_storage_details import ObjectStorageDetails
 from ads.common.utils import get_log_links
 from ads.config import (
diff --git a/tests/unitary/with_extras/aqua/test_utils.py b/tests/unitary/with_extras/aqua/test_utils.py
index a6fb5d0b0..5b4c66740 100644
--- a/tests/unitary/with_extras/aqua/test_utils.py
+++ b/tests/unitary/with_extras/aqua/test_utils.py
@@ -1,22 +1,20 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import unittest
 from unittest.mock import MagicMock, patch
 
-import pytest
-from ads.common.object_storage_details import ObjectStorageDetails
 from oci.object_storage.models import ListObjects, ObjectSummary
 from oci.resource_search.models.resource_summary import ResourceSummary
 from parameterized import parameterized
 
 from ads.aqua.common import utils
 from ads.aqua.common.errors import AquaRuntimeError
+from ads.common.object_storage_details import ObjectStorageDetails
 from ads.common.oci_resource import SEARCH_TYPE, OCIResource
 from ads.config import TENANCY_OCID
-from ads.common import auth as authutil
 
 
 class TestDataset:

From b2811b2e4c3255cc6ec3432021bf45e631bacece Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Wed, 5 Mar 2025 19:34:46 -0800
Subject: [PATCH 080/124] Adds extra validation for the container family.

---
 ads/aqua/common/entities.py                   | 94 ++++++++++++++++++-
 ads/aqua/config/container_config.py           |  3 +-
 ads/aqua/modeldeployment/deployment.py        | 61 ++++++++++--
 .../with_extras/aqua/test_common_entities.py  | 59 +++++++++++-
 4 files changed, 204 insertions(+), 13 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index eaf6b96df..355eb232e 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -3,7 +3,7 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import re
-from typing import Dict, Optional
+from typing import Any, Dict, Optional
 
 from pydantic import Field, model_validator
 
@@ -145,3 +145,95 @@ class AquaMultiModelRef(Serializable):
     class Config:
         extra = "ignore"
         protected_namespaces = ()
+
+
+class ContainerPath(Serializable):
+    """
+    Represents a parsed container path, extracting the path, name, and version.
+
+    This model is designed to parse a container path string of the format
+    '<image_path>:<version>'. It extracts the following components:
+    - `path`: The full path up to the version.
+    - `name`: The last segment of the path, representing the image name.
+    - `version`: The version number following the final colon.
+
+    Example Usage:
+    --------------
+    >>> container = ContainerPath(full_path="iad.ocir.io/ociodscdev/odsc-llm-evaluate:0.1.2.9")
+    >>> container.path
+    'iad.ocir.io/ociodscdev/odsc-llm-evaluate'
+    >>> container.name
+    'odsc-llm-evaluate'
+    >>> container.version
+    '0.1.2.9'
+
+    >>> container = ContainerPath(full_path="custom-scheme://path/to/versioned-model:2.5.1")
+    >>> container.path
+    'custom-scheme://path/to/versioned-model'
+    >>> container.name
+    'versioned-model'
+    >>> container.version
+    '2.5.1'
+
+    Attributes
+    ----------
+    full_path : str
+        The complete container path string to be parsed.
+    path : Optional[str]
+        The full path up to the version (e.g., 'iad.ocir.io/ociodscdev/odsc-llm-evaluate').
+    name : Optional[str]
+        The image name, which is the last segment of `path` (e.g., 'odsc-llm-evaluate').
+    version : Optional[str]
+        The version number following the final colon in the path (e.g., '0.1.2.9').
+
+    Methods
+    -------
+    validate(values: Any) -> Any
+        Validates and parses the `full_path`, extracting `path`, `name`, and `version`.
+    """
+
+    full_path: str
+    path: Optional[str] = None
+    name: Optional[str] = None
+    version: Optional[str] = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate(cls, values: Any) -> Any:
+        """
+        Validates and parses the full container path, extracting the image path, image name, and version.
+
+        Parameters
+        ----------
+        values : dict
+            The dictionary of values being validated, containing 'full_path'.
+
+        Returns
+        -------
+        dict
+            Updated values dictionary with extracted 'path', 'name', and 'version'.
+        """
+        full_path = values.get("full_path", "").strip()
+
+        # Regex to parse <image_path>:<version>
+        match = re.match(
+            r"^(?P<image_path>.+?)(?::(?P<image_version>[\w\.]+))?$", full_path
+        )
+
+        if not match:
+            raise ValueError(
+                "Invalid container path format. Expected format: '<image_path>:<version>'"
+            )
+
+        # Extract image_path and version
+        values["path"] = match.group("image_path")
+        values["version"] = match.group("image_version")
+
+        # Extract image_name as the last segment of image_path
+        values["name"] = values["path"].split("/")[-1]
+
+        return values
+
+    class Config:
+        extra = "ignore"
+        protected_namespaces = ()
diff --git a/ads/aqua/config/container_config.py b/ads/aqua/config/container_config.py
index 7912d839b..2fa4a6a81 100644
--- a/ads/aqua/config/container_config.py
+++ b/ads/aqua/config/container_config.py
@@ -83,6 +83,7 @@ class AquaContainerConfigItem(Serializable):
 
     class Config:
         extra = "allow"
+        protected_namespaces = ()
 
 
 class AquaContainerConfig(Serializable):
@@ -131,7 +132,7 @@ def from_container_index_json(
         -------
         AquaContainerConfig: The constructed container configuration.
         """
-        #TODO: Return this logic back if necessary in the next iteraion.
+        # TODO: Return this logic back if necessary in the next iteraion.
         # if not config:
         #     config = get_container_config()
 
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 12b87c127..cfabf65b2 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -15,6 +15,7 @@
 from ads.aqua.common.entities import (
     AquaMultiModelRef,
     ComputeShapeSummary,
+    ContainerPath,
     ContainerSpec,
 )
 from ads.aqua.common.enums import InferenceContainerTypeFamily, ModelFormat, Tags
@@ -35,6 +36,7 @@
     load_gpu_shapes_index,
     validate_cmd_var,
 )
+from ads.aqua.config.container_config import AquaContainerConfig
 from ads.aqua.constants import (
     AQUA_MODEL_ARTIFACT_FILE,
     AQUA_MODEL_TYPE_CUSTOM,
@@ -162,6 +164,9 @@ def create(
         freeform_tags = create_deployment_details.freeform_tags
         defined_tags = create_deployment_details.defined_tags
 
+        # Get container config
+        container_config = get_container_config()
+
         # Create an AquaModelApp instance once to perform the deployment creation.
         model_app = AquaModelApp()
         if create_deployment_details.model_id:
@@ -175,6 +180,7 @@ def create(
             return self._create(
                 aqua_model=aqua_model,
                 create_deployment_details=create_deployment_details,
+                container_config=container_config,
             )
         else:
             model_ids = [model.model_id for model in create_deployment_details.models]
@@ -191,9 +197,9 @@ def create(
                 raise AquaValueError(f"{err}") from err
 
             # TODO: update it when more deployment containers are supported
-            supported_container_families = (
+            supported_container_families = [
                 InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY
-            )
+            ]
 
             # Check if provided container family supports multi-model deployment
             if (
@@ -203,7 +209,7 @@ def create(
             ):
                 raise AquaValueError(
                     f"Unsupported deployment container '{create_deployment_details.container_family}'. "
-                    f"Only '{supported_container_families}' are supported for multi-model deployments."
+                    f"Only {supported_container_families} families are supported for multi-model deployments."
                 )
 
             # Verify if it matches one of the registered containers and attempt to
@@ -211,12 +217,43 @@ def create(
             # If the container is not recognized, we can only issue a warning that
             # the provided container may not support multi-model deployment.
             if create_deployment_details.container_image_uri:
-                # TODO Add registered container validation
-                logger.warning(
-                    f"The provided container `{create_deployment_details.container_image_uri}` may not support multi-model deployment. "
-                    f"Only the following container families are supported: `{supported_container_families}`."
+                service_inference_containers = (
+                    AquaContainerConfig.from_container_index_json(
+                        config=container_config
+                    ).inference.values()
                 )
 
+                selected_container_name = ContainerPath(
+                    full_path=create_deployment_details.container_image_uri
+                ).name
+
+                container_config_item = next(
+                    (
+                        container_config_item
+                        for container_config_item in service_inference_containers
+                        if ContainerPath(
+                            full_path=f"{container_config_item.name}:{container_config_item.version}"
+                        ).name.upper()
+                        == selected_container_name.upper()
+                    ),
+                    None,
+                )
+
+                if (
+                    container_config_item
+                    and container_config_item.family not in supported_container_families
+                ):
+                    raise AquaValueError(
+                        f"Unsupported deployment container '{create_deployment_details.container_image_uri}'. "
+                        f"Only {supported_container_families} families are supported for multi-model deployments."
+                    )
+
+                if not container_config_item:
+                    logger.warning(
+                        f"The provided container `{create_deployment_details.container_image_uri}` may not support multi-model deployment. "
+                        f"Only the following container families are supported: {supported_container_families}."
+                    )
+
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
                 compartment_id=compartment_id,
@@ -227,12 +264,14 @@ def create(
             return self._create_multi(
                 aqua_model=aqua_model,
                 create_deployment_details=create_deployment_details,
+                container_config=container_config,
             )
 
     def _create(
         self,
         aqua_model: DataScienceModel,
         create_deployment_details: CreateModelDeploymentDetails,
+        container_config: Dict,
     ) -> AquaDeployment:
         """Builds the configurations required by single model deployment and creates the deployment.
 
@@ -243,6 +282,8 @@ def _create(
         create_deployment_details : CreateModelDeploymentDetails
             An instance of CreateModelDeploymentDetails containing all required and optional
             fields for creating a model deployment via Aqua.
+        container_config: Dict
+            Container config dictionary.
 
         Returns
         -------
@@ -392,7 +433,6 @@ def _create(
         # Fetch the startup cli command for the container
         # container_index.json will have "containerSpec" section which will provide the cli params for
         # a given container family
-        container_config = get_container_config()
         container_spec = container_config.get(ContainerSpec.CONTAINER_SPEC, {}).get(
             container_type_key, {}
         )
@@ -472,6 +512,7 @@ def _create_multi(
         self,
         aqua_model: DataScienceModel,
         create_deployment_details: CreateModelDeploymentDetails,
+        container_config: Dict,
     ) -> AquaDeployment:
         """Builds the environment variables required by multi deployment container and creates the deployment.
 
@@ -482,7 +523,8 @@ def _create_multi(
         create_deployment_details : CreateModelDeploymentDetails
             An instance of CreateModelDeploymentDetails containing all required and optional
             fields for creating a model deployment via Aqua.
-
+        container_config: Dict
+            Container config dictionary.
         Returns
         -------
         AquaDeployment
@@ -496,7 +538,6 @@ def _create_multi(
             model=aqua_model,
             container_family=create_deployment_details.container_family,
         )
-        container_config = get_container_config()
         container_spec = container_config.get(
             ContainerSpec.CONTAINER_SPEC, UNKNOWN_DICT
         ).get(container_type_key, UNKNOWN_DICT)
diff --git a/tests/unitary/with_extras/aqua/test_common_entities.py b/tests/unitary/with_extras/aqua/test_common_entities.py
index 23d5e134f..778c07ff1 100644
--- a/tests/unitary/with_extras/aqua/test_common_entities.py
+++ b/tests/unitary/with_extras/aqua/test_common_entities.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from ads.aqua.common.entities import ComputeShapeSummary
+from ads.aqua.common.entities import ComputeShapeSummary, ContainerPath
 
 
 class TestComputeShapeSummary:
@@ -62,3 +62,60 @@ def test_set_gpu_specs(self, input_data, expected_gpu_specs):
             assert shape.gpu_specs.gpu_memory_in_gbs == expected_gpu_specs.get(
                 "gpu_memory_in_gbs"
             )
+
+
+class TestContainerPath:
+    """The unit tests for ContainerPath."""
+
+    @pytest.mark.parametrize(
+        "image_path, expected_result",
+        [
+            (
+                "iad.ocir.io/ociodscdev/odsc-llm-evaluate:0.1.2.9",
+                {
+                    "full_path": "iad.ocir.io/ociodscdev/odsc-llm-evaluate:0.1.2.9",
+                    "path": "iad.ocir.io/ociodscdev/odsc-llm-evaluate",
+                    "name": "odsc-llm-evaluate",
+                    "version": "0.1.2.9",
+                },
+            ),
+            (
+                "dsmc://model-with-version:0.2.78.0",
+                {
+                    "full_path": "dsmc://model-with-version:0.2.78.0",
+                    "path": "dsmc://model-with-version",
+                    "name": "model-with-version",
+                    "version": "0.2.78.0",
+                },
+            ),
+            (
+                "oci://my-custom-model-version:1.0.0",
+                {
+                    "full_path": "oci://my-custom-model-version:1.0.0",
+                    "path": "oci://my-custom-model-version",
+                    "name": "my-custom-model-version",
+                    "version": "1.0.0",
+                },
+            ),
+            (
+                "custom-scheme://path/to/versioned-model:2.5.1",
+                {
+                    "full_path": "custom-scheme://path/to/versioned-model:2.5.1",
+                    "path": "custom-scheme://path/to/versioned-model",
+                    "name": "versioned-model",
+                    "version": "2.5.1",
+                },
+            ),
+            (
+                "custom-scheme://path/to/versioned-model",
+                {
+                    "full_path": "custom-scheme://path/to/versioned-model",
+                    "path": "custom-scheme://path/to/versioned-model",
+                    "name": "versioned-model",
+                    "version": None,
+                },
+            ),
+        ],
+    )
+    def test_positive(self, image_path, expected_result):
+        assert ContainerPath(full_path=image_path).model_dump() == expected_result

From bdb4de2d10f9be05ba1a35a88a92e11df0918410 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 6 Mar 2025 13:27:18 -0500
Subject: [PATCH 081/124] Added validation for custom model.

---
 ads/aqua/modeldeployment/entities.py          |  18 ++-
 .../with_extras/aqua/test_deployment.py       | 104 ++++++++++--------
 2 files changed, 73 insertions(+), 49 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index fee4ad44c..08a10693c 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -7,7 +7,7 @@
 from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
 from pydantic import BaseModel, Field, model_validator
 
-from ads.aqua import logger
+from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
 from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
@@ -15,6 +15,7 @@
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
+from ads.model.datascience_model import DataScienceModel
 
 
 class ShapeInfo(Serializable):
@@ -517,12 +518,14 @@ def validate_multimodel_deployment_feasibility(
         selected_shape = self.instance_shape
 
         if selected_shape not in models_config_summary.gpu_allocation:
+            supported_shapes = list(models_config_summary.gpu_allocation.keys())
             logger.error(
-                f"The model group is not compatible with the selected instance shape {selected_shape}"
+                f"The model group is not compatible with the selected instance shape "
+                f"'{selected_shape}'. Select a different instance shape from supported shapes {supported_shapes}."
             )
             raise ConfigValidationError(
                 f"The model group is not compatible with the selected instance shape "
-                f"'{selected_shape}'. Select a different instance shape."
+                f"'{selected_shape}'. Select a different instance shape from supported shapes {supported_shapes}."
             )
 
         total_available_gpus = models_config_summary.gpu_allocation[
@@ -550,6 +553,15 @@ def validate_multimodel_deployment_feasibility(
             sum_model_gpus += model.gpu_count
             aqua_deployment_config = model_deployment_config[model.model_id]
 
+            source_model = DataScienceModel.from_id(model.model_id)
+            # Validates custom model with deployment config
+            # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk
+            if (
+                source_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID
+                and not aqua_deployment_config.configuration
+            ):
+                continue
+
             # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
             # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
             # Our current configuration does not support this flexibility.
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index cf86c3288..b3ef98773 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1746,46 +1746,13 @@ def test_validate_deployment_params_for_unverified_models(
                     container_family=container_type_key,
                 )
 
-
-class TestMDInferenceResponse(unittest.TestCase):
-    def setUp(self):
-        self.app = MDInferenceResponse()
-
-    @classmethod
-    def setUpClass(cls):
-        cls.curr_dir = os.path.dirname(os.path.abspath(__file__))
-
-    @classmethod
-    def tearDownClass(cls):
-        cls.curr_dir = None
-
-    @patch("requests.post")
-    def test_get_model_deployment_response(self, mock_post):
-        """Test to check if model deployment response is returned correctly."""
-
-        endpoint = TestDataset.MODEL_DEPLOYMENT_URL + "/predict"
-        self.app.prompt = "What is 1+1?"
-        self.app.model_params = ModelParams(**TestDataset.model_params)
-
-        mock_response = MagicMock()
-        response_json = os.path.join(
-            self.curr_dir, "test_data/deployment/aqua_deployment_response.json"
-        )
-        with open(response_json, "r") as _file:
-            mock_response.content = _file.read()
-        mock_response.status_code = 200
-        mock_post.return_value = mock_response
-
-        result = self.app.get_model_deployment_response(endpoint)
-        assert result["choices"][0]["text"] == " The answer is 2"
-
-
-class TestCreateModelDeploymentDetails:
-    curr_dir = os.path.dirname(__file__)  # Define curr_dir
-
+    @patch("ads.model.datascience_model.DataScienceModel.from_id")
     def validate_multimodel_deployment_feasibility_helper(
-        self, models, instance_shape, display_name, total_gpus, multi_model="true"
+        self, models, instance_shape, display_name, total_gpus, mock_from_id
     ):
+        mock_from_id.return_value = MagicMock(
+            compartment_id=TestDataset.SERVICE_COMPARTMENT_ID
+        )
         config_json = os.path.join(
             self.curr_dir, "test_data/deployment/aqua_summary_multi_model.json"
         )
@@ -1803,7 +1770,7 @@ def validate_multimodel_deployment_feasibility_helper(
                 models=aqua_models,
                 instance_shape=instance_shape,
                 display_name=display_name,
-                freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model},
+                freeform_tags={Tags.MULTIMODEL_TYPE_TAG: "true"},
             )
         else:
             model_id = "model_a"
@@ -1811,7 +1778,7 @@ def validate_multimodel_deployment_feasibility_helper(
                 model_id=model_id,
                 instance_shape=instance_shape,
                 display_name=display_name,
-                freeform_tags={Tags.MULTIMODEL_TYPE_TAG: multi_model},
+                freeform_tags={Tags.MULTIMODEL_TYPE_TAG: "true"},
             )
 
         mock_models_config_summary = ModelDeploymentConfigSummary(**(config))
@@ -1820,8 +1787,7 @@ def validate_multimodel_deployment_feasibility_helper(
             models_config_summary=mock_models_config_summary
         )
 
-    @pytest.mark.parametrize(
-        "models, instance_shape, display_name, total_gpus",
+    @parameterized.expand(
         [
             (
                 [
@@ -1855,15 +1821,18 @@ def validate_multimodel_deployment_feasibility_helper(
             ),
         ],
     )
+    @patch("ads.model.datascience_model.DataScienceModel.from_id")
     def test_validate_multimodel_deployment_feasibility_positive(
-        self, models, instance_shape, display_name, total_gpus
+        self, models, instance_shape, display_name, total_gpus, mock_from_id
     ):
+        mock_from_id.return_value = MagicMock(
+            compartment_id=TestDataset.SERVICE_COMPARTMENT_ID
+        )
         self.validate_multimodel_deployment_feasibility_helper(
             models, instance_shape, display_name, total_gpus
         )
 
-    @pytest.mark.parametrize(
-        "models, instance_shape, display_name, total_gpus, value_error",
+    @parameterized.expand(
         [
             (
                 None,
@@ -1933,10 +1902,53 @@ def test_validate_multimodel_deployment_feasibility_positive(
             ),
         ],
     )
+    @patch("ads.model.datascience_model.DataScienceModel.from_id")
     def test_validate_multimodel_deployment_feasibility_negative(
-        self, models, instance_shape, display_name, total_gpus, value_error
+        self,
+        models,
+        instance_shape,
+        display_name,
+        total_gpus,
+        value_error,
+        mock_from_id,
     ):
+        mock_from_id.return_value = MagicMock(
+            compartment_id=TestDataset.SERVICE_COMPARTMENT_ID
+        )
         with pytest.raises(ConfigValidationError, match=value_error):
             self.validate_multimodel_deployment_feasibility_helper(
                 models, instance_shape, display_name, total_gpus
             )
+
+
+class TestMDInferenceResponse(unittest.TestCase):
+    def setUp(self):
+        self.app = MDInferenceResponse()
+
+    @classmethod
+    def setUpClass(cls):
+        cls.curr_dir = os.path.dirname(os.path.abspath(__file__))
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.curr_dir = None
+
+    @patch("requests.post")
+    def test_get_model_deployment_response(self, mock_post):
+        """Test to check if model deployment response is returned correctly."""
+
+        endpoint = TestDataset.MODEL_DEPLOYMENT_URL + "/predict"
+        self.app.prompt = "What is 1+1?"
+        self.app.model_params = ModelParams(**TestDataset.model_params)
+
+        mock_response = MagicMock()
+        response_json = os.path.join(
+            self.curr_dir, "test_data/deployment/aqua_deployment_response.json"
+        )
+        with open(response_json, "r") as _file:
+            mock_response.content = _file.read()
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+
+        result = self.app.get_model_deployment_response(endpoint)
+        assert result["choices"][0]["text"] == " The answer is 2"

From d6b20d5f916d74ca830c5fce1a9e27b834fbe17e Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 7 Mar 2025 13:33:02 -0500
Subject: [PATCH 082/124] Support deploy single model for multi deployment.

---
 ads/aqua/modeldeployment/deployment.py | 39 +++++++-----
 ads/aqua/modeldeployment/entities.py   | 12 +++-
 ads/aqua/modeldeployment/utils.py      | 87 ++++++++++++++++++++++++++
 3 files changed, 123 insertions(+), 15 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index cfabf65b2..830ac86db 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -145,18 +145,10 @@ def create(
                     f"Invalid parameters for creating a model deployment. Error details: {custom_errors}."
                 ) from ex
 
-        # If a single model is provided, delegate to `create` method
-        if (
-            not create_deployment_details.model_id
-            and create_deployment_details.models
-            and len(create_deployment_details.models) == 1
-        ):
-            single_model = create_deployment_details.models[0]
-            logger.info(
-                f"Single model ({single_model.model_id}) provided. "
-                "Delegating to single model creation method."
+        if not (create_deployment_details.model_id or create_deployment_details.models):
+            raise AquaValueError(
+                "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided."
             )
-            create_deployment_details.model_id = single_model.model_id
 
         # Set defaults for compartment and project if not provided.
         compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
@@ -170,6 +162,10 @@ def create(
         # Create an AquaModelApp instance once to perform the deployment creation.
         model_app = AquaModelApp()
         if create_deployment_details.model_id:
+            logger.info(
+                f"Single model ({create_deployment_details.model_id}) provided. "
+                "Delegating to single model creation method."
+            )
             aqua_model = model_app.create(
                 model_id=create_deployment_details.model_id,
                 compartment_id=compartment_id,
@@ -254,6 +250,10 @@ def create(
                         f"Only the following container families are supported: {supported_container_families}."
                     )
 
+            logger.info(
+                f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
+            )
+
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
                 compartment_id=compartment_id,
@@ -1051,15 +1051,26 @@ def get_multimodel_deployment_config(
         ModelDeploymentConfigSummary
             A summary of the model deployment configurations and GPU allocations.
         """
+        if not model_ids:
+            raise AquaValueError(
+                "Invalid or empty parameter `model_ids`. Specify a list of valid model ids to get multi model deployment config."
+            )
 
         compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
 
         # Get the all model deployment available shapes in a given compartment
         available_shapes = self.list_shapes(compartment_id=compartment_id)
 
-        return MultiModelDeploymentConfigLoader(
-            deployment_app=self,
-        ).load(
+        multi_model_deployment_config_loader = MultiModelDeploymentConfigLoader(
+            deployment_app=self
+        )
+
+        if len(model_ids) == 1:
+            return multi_model_deployment_config_loader.load_single(
+                shapes=available_shapes, model_id=model_ids[0]
+            )
+
+        return multi_model_deployment_config_loader.load(
             shapes=available_shapes,
             model_ids=model_ids,
             primary_model_id=primary_model_id,
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index fee4ad44c..f67d820fe 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -7,7 +7,7 @@
 from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
 from pydantic import BaseModel, Field, model_validator
 
-from ads.aqua import logger
+from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
 from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
@@ -15,6 +15,7 @@
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
+from ads.model.datascience_model import DataScienceModel
 
 
 class ShapeInfo(Serializable):
@@ -550,6 +551,15 @@ def validate_multimodel_deployment_feasibility(
             sum_model_gpus += model.gpu_count
             aqua_deployment_config = model_deployment_config[model.model_id]
 
+            source_model = DataScienceModel.from_id(model.model_id)
+            # Validates custom model with deployment config
+            # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk
+            if (
+                source_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID
+                and not aqua_deployment_config.configuration
+            ):
+                continue
+
             # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
             # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
             # Our current configuration does not support this flexibility.
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index fffd4ddab..32f95ae14 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -134,6 +134,93 @@ def load(
         summary.gpu_allocation = gpu_allocation
         return summary
 
+    def load_single(
+        self,
+        shapes: List[ComputeShapeSummary],
+        model_id: str,
+    ) -> ModelDeploymentConfigSummary:
+        """
+        Retrieves deployment configuration for single model and allocate all available GPU count to it.
+
+        Parameters
+        ----------
+        shapes : List[ComputeShapeSummary]
+            Model deployment available shapes.
+        model_id : str
+            The OCID for the Aqua model.
+
+        Returns
+        -------
+        ModelDeploymentConfigSummary
+            A summary of the deployment configurations and GPU allocations. If GPU allocation
+            cannot be determined, an appropriate error message is included in the summary.
+        """
+        # Fetch deployment configuration concurrently.
+        logger.debug(f"Loading model deployment configuration for model: {model_id}")
+        deployment_config = self._fetch_deployment_configs_concurrently([model_id])[
+            model_id
+        ]
+
+        deployment = {
+            model_id: {
+                "shape": [shape.upper() for shape in deployment_config.shape],
+                "configuration": {
+                    shape.upper(): deployment_config.configuration.get(
+                        shape, ConfigurationItem()
+                    )
+                    for shape in deployment_config.shape
+                },
+            }
+        }
+
+        # Initialize the summary result with the deployment configurations.
+        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+
+        # Find out the common shapes from deployment config and available deployment shapes
+        shape = [shape.upper() for shape in deployment_config.shape]
+        common_shapes = [shape.name.upper() for shape in shapes]
+        if shape:
+            common_shapes = list(set(common_shapes).intersection(set(shape)))
+
+        if not common_shapes:
+            summary.error_message = (
+                "The selected model does not have any available deployment shape. "
+                "Please ensure that chosen model is compatible for multi-model deployment."
+            )
+            logger.debug(
+                f"No compatible deployment shapes found for selected model: {model_id}"
+            )
+            return summary
+
+        logger.debug(f"Available Common Shapes: {common_shapes}")
+
+        gpu_allocation = {}
+        for shape in common_shapes:
+            total_gpus_available = 0
+            shape_summary = next(
+                (
+                    deployment_shape
+                    for deployment_shape in shapes
+                    if deployment_shape.name.upper() == shape
+                ),
+                None,
+            )
+            if shape_summary and shape_summary.gpu_specs:
+                total_gpus_available = shape_summary.gpu_specs.gpu_count
+
+            if total_gpus_available != 0:
+                gpu_allocation[shape] = GPUShapeAllocation(
+                    models=[
+                        GPUModelAllocation(
+                            ocid=model_id, gpu_count=total_gpus_available
+                        )
+                    ],
+                    total_gpus_available=total_gpus_available,
+                )
+
+        summary.gpu_allocation = gpu_allocation
+        return summary
+
     def _fetch_deployment_configs_concurrently(
         self, model_ids: List[str]
     ) -> Dict[str, AquaDeploymentConfig]:

From 1b1268b093f05f5ddb70b4a6dd600b5b35f84b00 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 7 Mar 2025 14:10:31 -0500
Subject: [PATCH 083/124] Updated pr.

---
 ads/aqua/modeldeployment/entities.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index f67d820fe..5fff72789 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -7,7 +7,7 @@
 from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
 from pydantic import BaseModel, Field, model_validator
 
-from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
+from ads.aqua import logger
 from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
@@ -15,7 +15,6 @@
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
-from ads.model.datascience_model import DataScienceModel
 
 
 class ShapeInfo(Serializable):
@@ -551,13 +550,9 @@ def validate_multimodel_deployment_feasibility(
             sum_model_gpus += model.gpu_count
             aqua_deployment_config = model_deployment_config[model.model_id]
 
-            source_model = DataScienceModel.from_id(model.model_id)
             # Validates custom model with deployment config
             # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk
-            if (
-                source_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID
-                and not aqua_deployment_config.configuration
-            ):
+            if not aqua_deployment_config.configuration:
                 continue
 
             # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).

From ca835e43b49feae59710f5ee6623bb0273afe12c Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Fri, 7 Mar 2025 14:13:05 -0500
Subject: [PATCH 084/124] Updated pr.

---
 ads/aqua/modeldeployment/entities.py            |  9 ++-------
 .../unitary/with_extras/aqua/test_deployment.py | 17 ++---------------
 2 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 08a10693c..9166a538d 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -7,7 +7,7 @@
 from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
 from pydantic import BaseModel, Field, model_validator
 
-from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger
+from ads.aqua import logger
 from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
@@ -15,7 +15,6 @@
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_console_link
-from ads.model.datascience_model import DataScienceModel
 
 
 class ShapeInfo(Serializable):
@@ -553,13 +552,9 @@ def validate_multimodel_deployment_feasibility(
             sum_model_gpus += model.gpu_count
             aqua_deployment_config = model_deployment_config[model.model_id]
 
-            source_model = DataScienceModel.from_id(model.model_id)
             # Validates custom model with deployment config
             # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk
-            if (
-                source_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID
-                and not aqua_deployment_config.configuration
-            ):
+            if not aqua_deployment_config.configuration:
                 continue
 
             # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index b3ef98773..e2acea8e6 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1746,13 +1746,9 @@ def test_validate_deployment_params_for_unverified_models(
                     container_family=container_type_key,
                 )
 
-    @patch("ads.model.datascience_model.DataScienceModel.from_id")
     def validate_multimodel_deployment_feasibility_helper(
-        self, models, instance_shape, display_name, total_gpus, mock_from_id
+        self, models, instance_shape, display_name, total_gpus
     ):
-        mock_from_id.return_value = MagicMock(
-            compartment_id=TestDataset.SERVICE_COMPARTMENT_ID
-        )
         config_json = os.path.join(
             self.curr_dir, "test_data/deployment/aqua_summary_multi_model.json"
         )
@@ -1821,13 +1817,9 @@ def validate_multimodel_deployment_feasibility_helper(
             ),
         ],
     )
-    @patch("ads.model.datascience_model.DataScienceModel.from_id")
     def test_validate_multimodel_deployment_feasibility_positive(
-        self, models, instance_shape, display_name, total_gpus, mock_from_id
+        self, models, instance_shape, display_name, total_gpus
     ):
-        mock_from_id.return_value = MagicMock(
-            compartment_id=TestDataset.SERVICE_COMPARTMENT_ID
-        )
         self.validate_multimodel_deployment_feasibility_helper(
             models, instance_shape, display_name, total_gpus
         )
@@ -1902,7 +1894,6 @@ def test_validate_multimodel_deployment_feasibility_positive(
             ),
         ],
     )
-    @patch("ads.model.datascience_model.DataScienceModel.from_id")
     def test_validate_multimodel_deployment_feasibility_negative(
         self,
         models,
@@ -1910,11 +1901,7 @@ def test_validate_multimodel_deployment_feasibility_negative(
         display_name,
         total_gpus,
         value_error,
-        mock_from_id,
     ):
-        mock_from_id.return_value = MagicMock(
-            compartment_id=TestDataset.SERVICE_COMPARTMENT_ID
-        )
         with pytest.raises(ConfigValidationError, match=value_error):
             self.validate_multimodel_deployment_feasibility_helper(
                 models, instance_shape, display_name, total_gpus

From 106300e1e4c55a5a5fda1b85de1d1b7de0c37922 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 7 Mar 2025 11:42:54 -0800
Subject: [PATCH 085/124] Enhances the MMD validation

---
 ads/aqua/modeldeployment/entities.py          | 135 ++++++++----------
 .../with_extras/aqua/test_deployment.py       |   2 +-
 2 files changed, 63 insertions(+), 74 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 9166a538d..92bae5c76 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -481,125 +481,114 @@ def validate(cls, values: Any) -> Any:
 
     def validate_multimodel_deployment_feasibility(
         self, models_config_summary: ModelDeploymentConfigSummary
-    ):
+    ) -> None:
         """
-        Validates whether the user input of a model group (List[AquaMultiModelRef], 2+ models with a specified gpu count per model)
-        is feasible for a multi model deployment on the user's selected shape (instance_shape)
+        Validates whether the selected model group is feasible for a multi-model deployment
+        on the chosen instance shape.
 
         Validation Criteria:
-            - GPU Capacity: Ensures that the total number of GPUs requested by all models in the group does not exceed the GPU capacity of the selected instance shape.
-            - Verifies that all models in the group are compatible with the selected instance shape.
-            - Ensures that each model’s GPU allocation, as specified by the user, matches the requirements in the model's deployment configuration.
-            - Confirms that the selected instance shape supports multi-model deployment.
-            - Requires user input for the model group to be considered a valid multi-model deployment.
-
+        - Ensures that the model group is not empty.
+        - Verifies that the selected instance shape is supported by the GPU allocation.
+        - Confirms that each model in the group has a corresponding deployment configuration.
+        - Ensures that each model's user-specified GPU allocation is allowed by its deployment configuration.
+        - Checks that the total GPUs requested by the model group does not exceed the available GPU capacity
+            for the selected instance shape.
 
         Parameters
         ----------
-        models_config_summary : ModelDeploymentConfigSummary, optional
-            An instance of ModelDeploymentConfigSummary containing all required
-            fields (GPU Allocation, Deployment Configuration) for creating a multi model deployment via Aqua.
+        models_config_summary : ModelDeploymentConfigSummary
+            Contains GPU allocations and deployment configuration for models.
 
         Raises
-        -------
+        ------
         ConfigValidationError:
-            When the deployment is NOT a multi model deployment
-            When assigned GPU Allocations per model are NOT within the number of GPUs available in the instance shape
-            When all models in model group can NOT be deployed on the instance shape with the selected GPU count
+        - If the model group is empty.
+        - If the selected instance shape is not supported.
+        - If any model is missing from the deployment configuration.
+        - If a model's GPU allocation does not match any valid configuration.
+        - If the total requested GPUs exceed the instance shape’s capacity.
         """
+        # Ensure that at least one model is provided.
         if not self.models:
-            logger.error("User defined model group (List[AquaMultiModelRef]) is None.")
+            logger.error("No models provided in the model group.")
             raise ConfigValidationError(
-                "Multi-model deployment requires at least one model, but none were provided. "
-                "Please add one or more models to the model group to proceed."
+                "Multi-model deployment requires at least one model. Please add one or more models."
             )
 
-        selected_shape = self.instance_shape
+        selected_shape: str = self.instance_shape
 
+        # Verify that the selected shape is supported by the GPU allocation.
         if selected_shape not in models_config_summary.gpu_allocation:
             supported_shapes = list(models_config_summary.gpu_allocation.keys())
-            logger.error(
-                f"The model group is not compatible with the selected instance shape "
-                f"'{selected_shape}'. Select a different instance shape from supported shapes {supported_shapes}."
-            )
-            raise ConfigValidationError(
-                f"The model group is not compatible with the selected instance shape "
-                f"'{selected_shape}'. Select a different instance shape from supported shapes {supported_shapes}."
+            error_message = (
+                f"The model group is not compatible with the selected instance shape '{selected_shape}'. "
+                f"Supported shapes: {supported_shapes}."
             )
+            logger.error(error_message)
+            raise ConfigValidationError(error_message)
 
-        total_available_gpus = models_config_summary.gpu_allocation[
+        total_available_gpus: int = models_config_summary.gpu_allocation[
             selected_shape
         ].total_gpus_available
+        model_deployment_config: Dict[str, Any] = (
+            models_config_summary.deployment_config
+        )
 
-        model_deployment_config = models_config_summary.deployment_config
-
-        required_model_keys = [model.model_id for model in self.models]
-        missing_model_keys = required_model_keys - model_deployment_config.keys()
-
-        if len(missing_model_keys) > 0:
-            logger.error(
-                f"Missing the following model entry with key {missing_model_keys} "
-                "in ModelDeploymentConfigSummary"
-            )
-            raise ConfigValidationError(
-                "One or more selected models are missing from the configuration, preventing "
-                "validation for deployment on the given shape."
+        # Verify that every model in the group has a corresponding deployment configuration.
+        required_model_keys = {model.model_id for model in self.models}
+        missing_model_keys = required_model_keys - set(model_deployment_config.keys())
+        if missing_model_keys:
+            error_message = (
+                f"Missing deployment configuration for models: {missing_model_keys}. "
+                "Ensure all selected models are properly configured."
             )
+            logger.error(error_message)
+            raise ConfigValidationError(error_message)
 
         sum_model_gpus = 0
 
+        # Validate each model's GPU allocation against its deployment configuration.
         for model in self.models:
             sum_model_gpus += model.gpu_count
             aqua_deployment_config = model_deployment_config[model.model_id]
 
-            # Validates custom model with deployment config
-            # Skips validating custom models without deployment config as the configuration is empty and gpu counts are generated by ads sdk
+            # Skip validation for models without deployment configuration details.
             if not aqua_deployment_config.configuration:
                 continue
 
-            # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
-            # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
-            # Our current configuration does not support this flexibility.
-
-            # multi_deployment_shape = aqua_deployment_config.shape
-
             if selected_shape not in aqua_deployment_config.configuration:
-                logger.error(
-                    f"Model with OCID {model.model_id} in the model group is not compatible "
-                    f"with the selected instance shape: {selected_shape}"
-                )
-                raise ConfigValidationError(
-                    "Select a different instance shape. One or more models in the "
-                    "group are incompatible with the selected instance shape."
+                error_message = (
+                    f"Model {model.model_id} is not compatible with the selected instance shape '{selected_shape}'. "
+                    "Select a different instance shape."
                 )
+                logger.error(error_message)
+                raise ConfigValidationError(error_message)
 
+            # Retrieve valid GPU counts for the selected shape.
             multi_model_configs = aqua_deployment_config.configuration.get(
                 selected_shape, ConfigurationItem()
             ).multi_model_deployment
 
-            valid_gpu_configurations = [
-                gpu_shape_config.gpu_count for gpu_shape_config in multi_model_configs
-            ]
+            valid_gpu_configurations = [cfg.gpu_count for cfg in multi_model_configs]
+
             if model.gpu_count not in valid_gpu_configurations:
                 valid_gpu_str = ", ".join(map(str, valid_gpu_configurations))
-                logger.error(
-                    f"Model {model.model_id} allocated {model.gpu_count} GPUs by user, "
-                    f"but its deployment configuration requires either {valid_gpu_str} GPUs."
-                )
-                raise ConfigValidationError(
-                    "Change the GPU count for one or more models in the model group. "
-                    "Adjust GPU allocations per model or choose a larger instance shape."
+                error_message = (
+                    f"Model {model.model_id} allocated {model.gpu_count} GPUs, but valid GPU configurations "
+                    f"are: {valid_gpu_str}. Adjust the GPU allocation or select a larger instance shape."
                 )
+                logger.error(error_message)
+                raise ConfigValidationError(error_message)
 
+        # Check that the total GPU count for the model group does not exceed the instance capacity.
         if sum_model_gpus > total_available_gpus:
-            logger.error(
-                f"Selected shape {selected_shape} has {total_available_gpus} "
-                f"GPUs while model group has {sum_model_gpus} GPUs."
-            )
-            raise ConfigValidationError(
-                "Total requested GPU count exceeds the available GPU capacity for the selected "
-                "instance shape. Adjust GPU allocations per model or choose a larger instance shape."
+            error_message = (
+                f"Selected shape '{selected_shape}' has {total_available_gpus} GPUs, "
+                f"but the model group requires {sum_model_gpus} GPUs. "
+                "Adjust GPU allocations per model or choose a larger instance shape."
             )
+            logger.error(error_message)
+            raise ConfigValidationError(error_message)
 
     class Config:
         extra = "ignore"
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index e2acea8e6..b4fe292e4 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1902,7 +1902,7 @@ def test_validate_multimodel_deployment_feasibility_negative(
         total_gpus,
         value_error,
     ):
-        with pytest.raises(ConfigValidationError, match=value_error):
+        with pytest.raises(ConfigValidationError):
             self.validate_multimodel_deployment_feasibility_helper(
                 models, instance_shape, display_name, total_gpus
             )

From 22308375146652bd272c445e486200c07ae5dd66 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 7 Mar 2025 17:16:21 -0800
Subject: [PATCH 086/124] Switch Multi-Model deployment container validation to
 usage-based tag

---
 ads/aqua/config/container_config.py    |  7 +++++
 ads/aqua/model/model.py                | 38 ++++++++++++++++++++++----
 ads/aqua/modeldeployment/deployment.py | 24 ++++++++++------
 3 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/ads/aqua/config/container_config.py b/ads/aqua/config/container_config.py
index a5d9f1053..dea2b468c 100644
--- a/ads/aqua/config/container_config.py
+++ b/ads/aqua/config/container_config.py
@@ -4,12 +4,19 @@
 
 from typing import Dict, List, Optional
 
+from common.extended_enum import ExtendedEnum
 from pydantic import Field
 
 from ads.aqua.common.entities import ContainerSpec
 from ads.aqua.config.utils.serializer import Serializable
 
 
+class Usage(ExtendedEnum):
+    INFERENCE = "inference"
+    BATCH_INFERENCE = "batch_inference"
+    MULTI_MODEL = "multi_model"
+
+
 class AquaContainerConfigSpec(Serializable):
     """
     Represents container specification details.
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index cfc900767..a56501ee7 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -43,7 +43,7 @@
     read_file,
     upload_folder,
 )
-from ads.aqua.config.container_config import AquaContainerConfig
+from ads.aqua.config.container_config import AquaContainerConfig, Usage
 from ads.aqua.constants import (
     AQUA_MODEL_ARTIFACT_CONFIG,
     AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME,
@@ -258,11 +258,25 @@ def create_multi(
         display_name_list = []
         model_custom_metadata = ModelCustomMetadata()
 
-        # TODO: update it when more deployment containers are supported
-        supported_container_families = (
-            InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY
-        )
-        deployment_container = InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY
+        # Get container config
+        container_config = get_container_config()
+
+        service_inference_containers = AquaContainerConfig.from_container_index_json(
+            config=container_config
+        ).inference.values()
+
+        supported_container_families = [
+            container_config_item.family
+            for container_config_item in service_inference_containers
+            if Usage.MULTI_MODEL in container_config_item.usages
+        ]
+
+        if not supported_container_families:
+            raise AquaValueError(
+                "Currently, there are no containers that support multi-model deployment."
+            )
+
+        selected_models_deployment_containers = set()
 
         # Process each model
         for idx, model in enumerate(models):
@@ -309,6 +323,8 @@ def create_multi(
                     f"Only '{supported_container_families}' are supported for multi-model deployments."
                 )
 
+            selected_models_deployment_containers.add(deployment_container)
+
             # Add model-specific metadata
             model_custom_metadata.add(
                 key=f"model-id-{idx}",
@@ -350,6 +366,16 @@ def create_multi(
                 category="Other",
             )
 
+        # Check if the all models in the group shares same container family
+        if len(selected_models_deployment_containers) > 1:
+            raise AquaValueError(
+                "The selected models are associated with different container families: "
+                f"{list(selected_models_deployment_containers)}."
+                "For multi-model deployment, all models in the group must share the same container family."
+            )
+
+        deployment_container = selected_models_deployment_containers.pop()
+
         # Generate model group details
         timestamp = datetime.now().strftime("%Y%m%d")
         model_group_display_name = f"model_group_{timestamp}"
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index cfabf65b2..39c82cfd6 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -36,7 +36,7 @@
     load_gpu_shapes_index,
     validate_cmd_var,
 )
-from ads.aqua.config.container_config import AquaContainerConfig
+from ads.aqua.config.container_config import AquaContainerConfig, Usage
 from ads.aqua.constants import (
     AQUA_MODEL_ARTIFACT_FILE,
     AQUA_MODEL_TYPE_CUSTOM,
@@ -196,11 +196,23 @@ def create(
             except ConfigValidationError as err:
                 raise AquaValueError(f"{err}") from err
 
-            # TODO: update it when more deployment containers are supported
+            service_inference_containers = (
+                AquaContainerConfig.from_container_index_json(
+                    config=container_config
+                ).inference.values()
+            )
+
             supported_container_families = [
-                InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY
+                container_config_item.family
+                for container_config_item in service_inference_containers
+                if Usage.MULTI_MODEL in container_config_item.usages
             ]
 
+            if not supported_container_families:
+                raise AquaValueError(
+                    "Currently, there are no containers that support multi-model deployment."
+                )
+
             # Check if provided container family supports multi-model deployment
             if (
                 create_deployment_details.container_family
@@ -217,12 +229,6 @@ def create(
             # If the container is not recognized, we can only issue a warning that
             # the provided container may not support multi-model deployment.
             if create_deployment_details.container_image_uri:
-                service_inference_containers = (
-                    AquaContainerConfig.from_container_index_json(
-                        config=container_config
-                    ).inference.values()
-                )
-
                 selected_container_name = ContainerPath(
                     full_path=create_deployment_details.container_image_uri
                 ).name

From 080a2516632d0d6e911a45c2f72fd5a56d05ecb9 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Sun, 9 Mar 2025 19:08:41 -0400
Subject: [PATCH 087/124] Added unit test.

---
 .../with_extras/aqua/test_deployment.py       | 85 ++++++++++++++++++-
 1 file changed, 82 insertions(+), 3 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index b4fe292e4..c3daf82a1 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -499,10 +499,10 @@ class TestDataset:
         "deployment_config": {
             "model_a": {
                 "shape": [
-                    "BM.GPU.A100-V2.8",
-                    "BM.GPU.H100.8",
                     "VM.GPU.A10.2",
                     "VM.GPU.A10.4",
+                    "BM.GPU.A100-V2.8",
+                    "BM.GPU.H100.8",
                 ],
                 "configuration": {
                     "VM.GPU.A10.2": {
@@ -593,6 +593,73 @@ class TestDataset:
         "error_message": None,
     }
 
+    aqua_deployment_multi_model_config_single_custom = {
+        "deployment_config": {"model_a": {"shape": [], "configuration": {}}},
+        "gpu_allocation": {
+            "VM.GPU2.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU3.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU3.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "VM.GPU3.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU2.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "BM.GPU3.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU4.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.A100-V2.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.H100.8": {
+                "models": [{"ocid": "model_a", "gpu_count": 8}],
+                "total_gpus_available": 8,
+            },
+            "BM.GPU.T1.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+            "BM.GPU.A10.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "VM.GPU.A10.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "BM.GPU.L40S-NC.4": {
+                "models": [{"ocid": "model_a", "gpu_count": 4}],
+                "total_gpus_available": 4,
+            },
+            "VM.GPU.A10.1": {
+                "models": [{"ocid": "model_a", "gpu_count": 1}],
+                "total_gpus_available": 1,
+            },
+            "VM.GPU.A10.2": {
+                "models": [{"ocid": "model_a", "gpu_count": 2}],
+                "total_gpus_available": 2,
+            },
+        },
+        "error_message": None,
+    }
+
     aqua_deployment_multi_model_config_summary_hybrid = {
         "deployment_config": {
             "model_a": {
@@ -1001,7 +1068,7 @@ def test_get_deployment_config(self):
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )
     @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes")
-    def test_get_multimodel_deployment_config(
+    def test_get_multimodel_deployment_config_single(
         self, mock_list_shapes, mock_fetch_deployment_configs_concurrently
     ):
         config_json = os.path.join(
@@ -1035,6 +1102,18 @@ def test_get_multimodel_deployment_config(
             == TestDataset.aqua_deployment_multi_model_config_summary
         )
 
+        # custom model without deployment config
+        # deployment shape should be collected from `list_shapes`.
+        mock_fetch_deployment_configs_concurrently.return_value = {
+            "model_a": AquaDeploymentConfig()
+        }
+        result = self.app.get_multimodel_deployment_config(["model_a"])
+
+        assert (
+            result.model_dump()
+            == TestDataset.aqua_deployment_multi_model_config_single_custom
+        )
+
     @patch(
         "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently"
     )

From 5041e699d39fbedeb65ea07ee5f20da4924bc993 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Mar 2025 11:56:36 -0400
Subject: [PATCH 088/124] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py |  8 ++++----
 ads/aqua/modeldeployment/utils.py      | 12 ++++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 830ac86db..bc5673f20 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -162,7 +162,7 @@ def create(
         # Create an AquaModelApp instance once to perform the deployment creation.
         model_app = AquaModelApp()
         if create_deployment_details.model_id:
-            logger.info(
+            logger.debug(
                 f"Single model ({create_deployment_details.model_id}) provided. "
                 "Delegating to single model creation method."
             )
@@ -250,7 +250,7 @@ def create(
                         f"Only the following container families are supported: {supported_container_families}."
                     )
 
-            logger.info(
+            logger.debug(
                 f"Multi models ({model_ids}) provided. Delegating to multi model creation method."
             )
 
@@ -1066,11 +1066,11 @@ def get_multimodel_deployment_config(
         )
 
         if len(model_ids) == 1:
-            return multi_model_deployment_config_loader.load_single(
+            return multi_model_deployment_config_loader.load_model_deployment_configuration(
                 shapes=available_shapes, model_id=model_ids[0]
             )
 
-        return multi_model_deployment_config_loader.load(
+        return multi_model_deployment_config_loader.load_multi_model_deployment_configuration(
             shapes=available_shapes,
             model_ids=model_ids,
             primary_model_id=primary_model_id,
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index fc65b8df2..e6571120d 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -44,7 +44,7 @@ def __init__(self, deployment_app: AquaApp):
         """
         self.deployment_app = deployment_app
 
-    def load(
+    def load_multi_model_deployment_configuration(
         self,
         shapes: List[ComputeShapeSummary],
         model_ids: List[str],
@@ -132,7 +132,7 @@ def load(
         summary.gpu_allocation = gpu_allocation
         return summary
 
-    def load_single(
+    def load_model_deployment_configuration(
         self,
         shapes: List[ComputeShapeSummary],
         model_id: str,
@@ -160,15 +160,15 @@ def load_single(
         ]
 
         deployment = {
-            model_id: {
-                "shape": [shape.upper() for shape in deployment_config.shape],
-                "configuration": {
+            model_id: AquaDeploymentConfig(
+                shape=[shape.upper() for shape in deployment_config.shape],
+                configuration={
                     shape.upper(): deployment_config.configuration.get(
                         shape, ConfigurationItem()
                     )
                     for shape in deployment_config.shape
                 },
-            }
+            )
         }
 
         # Initialize the summary result with the deployment configurations.

From 76cc18a284e29c013af46da0c359d6eab6d6f69b Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Mon, 10 Mar 2025 18:13:55 -0400
Subject: [PATCH 089/124] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py |  15 +--
 ads/aqua/modeldeployment/utils.py      | 132 +++++++++++++++----------
 2 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index bc5673f20..d60aadc36 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -1053,7 +1053,7 @@ def get_multimodel_deployment_config(
         """
         if not model_ids:
             raise AquaValueError(
-                "Invalid or empty parameter `model_ids`. Specify a list of valid model ids to get multi model deployment config."
+                "Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration."
             )
 
         compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID)
@@ -1061,16 +1061,9 @@ def get_multimodel_deployment_config(
         # Get the all model deployment available shapes in a given compartment
         available_shapes = self.list_shapes(compartment_id=compartment_id)
 
-        multi_model_deployment_config_loader = MultiModelDeploymentConfigLoader(
-            deployment_app=self
-        )
-
-        if len(model_ids) == 1:
-            return multi_model_deployment_config_loader.load_model_deployment_configuration(
-                shapes=available_shapes, model_id=model_ids[0]
-            )
-
-        return multi_model_deployment_config_loader.load_multi_model_deployment_configuration(
+        return MultiModelDeploymentConfigLoader(
+            deployment_app=self,
+        ).load(
             shapes=available_shapes,
             model_ids=model_ids,
             primary_model_id=primary_model_id,
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index e6571120d..9d2188872 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -44,14 +44,14 @@ def __init__(self, deployment_app: AquaApp):
         """
         self.deployment_app = deployment_app
 
-    def load_multi_model_deployment_configuration(
+    def load(
         self,
         shapes: List[ComputeShapeSummary],
         model_ids: List[str],
         primary_model_id: Optional[str] = None,
     ) -> ModelDeploymentConfigSummary:
         """
-        Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
+        Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations.
 
         Parameters
         ----------
@@ -69,24 +69,48 @@ def load_multi_model_deployment_configuration(
             A summary of the deployment configurations and GPU allocations. If GPU allocation
             cannot be determined, an appropriate error message is included in the summary.
         """
-        # Fetch deployment configurations concurrently.
-        logger.debug(f"Loading model deployment configuration for models: {model_ids}")
-        deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
+        if len(model_ids) == 1:
+            return self._load_model_deployment_configuration(
+                shapes=shapes, model_ids=model_ids
+            )
 
-        logger.debug(f"Loaded config: {deployment_configs}")
-        model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
+        return self._load_multi_model_deployment_configuration(
+            shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
+        )
 
-        # Initialize the summary result with the deployment configurations.
-        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+    def _load_multi_model_deployment_configuration(
+        self,
+        shapes: List[ComputeShapeSummary],
+        model_ids: List[str],
+        primary_model_id: Optional[str] = None,
+    ) -> ModelDeploymentConfigSummary:
+        """
+        Retrieves deployment configurations for multiple models and calculates compatible GPU allocations.
+
+        Parameters
+        ----------
+        shapes : List[ComputeShapeSummary]
+            Model deployment available shapes.
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
+        primary_model_id : Optional[str], optional
+            The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model.
+            Otherwise, GPUs are evenly allocated.
+
+        Returns
+        -------
+        ModelDeploymentConfigSummary
+            A summary of the deployment configurations and GPU allocations. If GPU allocation
+            cannot be determined, an appropriate error message is included in the summary.
+        """
+        model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu(
+            shapes=shapes, model_ids=model_ids
+        )
 
         # Identify common deployment shapes among all models.
         common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu)
         logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}")
 
-        # Filter out not available shapes
-        available_shapes = [item.name.upper() for item in shapes]
-        logger.debug(f"Service Available Shapes: {available_shapes}")
-
         # If all models' shape configs are empty, use default deployment shapes instead
         common_shapes = (
             available_shapes
@@ -132,10 +156,10 @@ def load_multi_model_deployment_configuration(
         summary.gpu_allocation = gpu_allocation
         return summary
 
-    def load_model_deployment_configuration(
+    def _load_model_deployment_configuration(
         self,
         shapes: List[ComputeShapeSummary],
-        model_id: str,
+        model_ids: List[str],
     ) -> ModelDeploymentConfigSummary:
         """
         Retrieves deployment configuration for single model and allocate all available GPU count to it.
@@ -144,8 +168,8 @@ def load_model_deployment_configuration(
         ----------
         shapes : List[ComputeShapeSummary]
             Model deployment available shapes.
-        model_id : str
-            The OCID for the Aqua model.
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
 
         Returns
         -------
@@ -153,30 +177,13 @@ def load_model_deployment_configuration(
             A summary of the deployment configurations and GPU allocations. If GPU allocation
             cannot be determined, an appropriate error message is included in the summary.
         """
-        # Fetch deployment configuration concurrently.
-        logger.debug(f"Loading model deployment configuration for model: {model_id}")
-        deployment_config = self._fetch_deployment_configs_concurrently([model_id])[
-            model_id
-        ]
-
-        deployment = {
-            model_id: AquaDeploymentConfig(
-                shape=[shape.upper() for shape in deployment_config.shape],
-                configuration={
-                    shape.upper(): deployment_config.configuration.get(
-                        shape, ConfigurationItem()
-                    )
-                    for shape in deployment_config.shape
-                },
-            )
-        }
-
-        # Initialize the summary result with the deployment configurations.
-        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+        model_id = model_ids[0]
+        _, common_shapes, summary = self._fetch_model_shape_gpu(
+            shapes=shapes, model_ids=model_ids
+        )
 
         # Find out the common shapes from deployment config and available deployment shapes
-        shape = [shape.upper() for shape in deployment_config.shape]
-        common_shapes = [shape.name.upper() for shape in shapes]
+        shape = [shape.upper() for shape in summary.deployment_config[model_id].shape]
         if shape:
             common_shapes = list(set(common_shapes).intersection(set(shape)))
 
@@ -219,6 +226,24 @@ def load_model_deployment_configuration(
         summary.gpu_allocation = gpu_allocation
         return summary
 
+    def _fetch_model_shape_gpu(self, shapes: List[ComputeShapeSummary], model_ids: str):
+        """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
+        # Fetch deployment configurations concurrently.
+        logger.debug(f"Loading model deployment configuration for models: {model_ids}")
+        deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
+
+        logger.debug(f"Loaded config: {deployment_configs}")
+        model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
+
+        # Initialize the summary result with the deployment configurations.
+        summary = ModelDeploymentConfigSummary(deployment_config=deployment)
+
+        # Filter out not available shapes
+        available_shapes = [item.name.upper() for item in shapes]
+        logger.debug(f"Service Available Shapes: {available_shapes}")
+
+        return model_shape_gpu, available_shapes, summary
+
     def _fetch_deployment_configs_concurrently(
         self, model_ids: List[str]
     ) -> Dict[str, AquaDeploymentConfig]:
@@ -241,25 +266,30 @@ def _extract_model_shape_gpu(
     ):
         """Extracts shape and GPU count details from deployment configurations.
         Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
+        Supported shapes for single model deployment will be collected from `shape` entry in deployment config.
         """
         model_shape_gpu = {}
         deployment = {}
+        is_single_model = len(deployment_configs) == 1
 
         for model_id, config in deployment_configs.items():
-            # We cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
+            # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1).
             # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2.
             # Our current configuration does not support this flexibility.
-            # multi_deployment_shape = config.shape
-            multi_deployment_shape = list(config.configuration.keys())
-            model_shape_gpu[model_id] = {
-                shape.upper(): [
-                    item.gpu_count
-                    for item in config.configuration.get(
-                        shape, ConfigurationItem()
-                    ).multi_model_deployment
-                ]
-                for shape in multi_deployment_shape
-            }
+            # For single model deployment, we use `config.shape` to find the available shapes.
+            multi_deployment_shape = (
+                config.shape if is_single_model else list(config.configuration.keys())
+            )
+            if not is_single_model:
+                model_shape_gpu[model_id] = {
+                    shape.upper(): [
+                        item.gpu_count
+                        for item in config.configuration.get(
+                            shape, ConfigurationItem()
+                        ).multi_model_deployment
+                    ]
+                    for shape in multi_deployment_shape
+                }
             deployment[model_id] = {
                 "shape": [shape.upper() for shape in multi_deployment_shape],
                 "configuration": {

From f1f8f4222109c3e3c34229a2a8502027f3f1d51c Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Mon, 10 Mar 2025 15:31:11 -0700
Subject: [PATCH 090/124] Fixes the multi-model evaluation

---
 ads/aqua/evaluation/evaluation.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 5f72d46ae..2055e1f9a 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -31,7 +31,6 @@
     Tags,
 )
 from ads.aqua.common.errors import (
-    AquaError,
     AquaFileExistsError,
     AquaFileNotFoundError,
     AquaMissingKeyError,
@@ -598,7 +597,9 @@ def validate_model_name(
         custom_metadata_list = evaluation_source.custom_metadata_list
         user_model_name = user_model_parameters.get("model")
 
-        model_count = custom_metadata_list.get(ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT)
+        model_count = custom_metadata_list.get(
+            ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
+        )
 
         if model_count and custom_metadata_list:
             model_group_count = int(model_count.value)
@@ -611,11 +612,11 @@ def validate_model_name(
             )
 
         model_names = [
-            custom_metadata_list.get(f"model-name-{idx}")
+            custom_metadata_list.get(f"model-name-{idx}").value
             for idx in range(model_group_count)
         ]
 
-        valid_model_names = ", ".join(name.value for name in model_names if name is not None)
+        valid_model_names = ", ".join(name for name in model_names if name is not None)
 
         if "model" not in user_model_parameters:
             logger.debug(
@@ -626,7 +627,6 @@ def validate_model_name(
             )
 
         if user_model_name not in model_names:
-
             logger.debug(
                 f"User input for model name was {user_model_name}, expected {valid_model_names} evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
             )

From 2e09071d2bb8ef5472a2b4acff306cb7656d9218 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Mon, 10 Mar 2025 16:02:11 -0700
Subject: [PATCH 091/124] Fixes the multi-model evaluation validator

---
 ads/aqua/config/container_config.py |  2 +-
 ads/aqua/evaluation/evaluation.py   | 86 +++++++++++++++++------------
 2 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/ads/aqua/config/container_config.py b/ads/aqua/config/container_config.py
index dea2b468c..01e2f0d50 100644
--- a/ads/aqua/config/container_config.py
+++ b/ads/aqua/config/container_config.py
@@ -4,11 +4,11 @@
 
 from typing import Dict, List, Optional
 
-from common.extended_enum import ExtendedEnum
 from pydantic import Field
 
 from ads.aqua.common.entities import ContainerSpec
 from ads.aqua.config.utils.serializer import Serializable
+from ads.common.extended_enum import ExtendedEnum
 
 
 class Usage(ExtendedEnum):
diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 2055e1f9a..815d7dbcf 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -575,64 +575,82 @@ def validate_model_name(
         create_aqua_evaluation_details: CreateAquaEvaluationDetails,
     ) -> None:
         """
-        Validates the user input of the model name when creating an Aqua evaluation.
+        Validates the user input for the model name when creating an Aqua evaluation.
+
+        This function verifies that:
+        - The model group is not empty.
+        - The user provided a non-empty model name.
+        - The provided model name exists in the DataScienceModel metadata.
+        - The deployment configuration contains core metadata required for validation.
 
         Parameters
         ----------
-        evaluation_source: DataScienceModel
-            The DataScienceModel Object which contains all metadata
-            about each model in a single and multi model deployment.
-        create_aqua_evaluation_details: CreateAquaEvaluationDetails
-            The CreateAquaEvaluationDetails data class which contains all
-            required and optional fields to create the aqua evaluation.
+        evaluation_source : DataScienceModel
+            The DataScienceModel object containing metadata about each model in the deployment.
+        create_aqua_evaluation_details : CreateAquaEvaluationDetails
+            Contains required and optional fields for creating the Aqua evaluation.
 
         Raises
-        -------
-        AquaValueError:
-            - When the user fails to specify any input for the model name.
-            - When the user supplies a model name that does not match the model name set in the DataScienceModel metadata.
-            - When the DataScienceModel metadata lacks core attributes for validating the name"""
+        ------
+        AquaValueError
+            If the user fails to provide a model name or if the provided model name does not match
+            any of the valid model names in the deployment metadata.
+        AquaRuntimeError
+            If the metadata is missing the model group count or if the model group count is invalid.
+        """
         user_model_parameters = create_aqua_evaluation_details.model_parameters
-
         custom_metadata_list = evaluation_source.custom_metadata_list
         user_model_name = user_model_parameters.get("model")
 
+        # Ensure that a non-empty model name was provided.
+        if not user_model_name:
+            error_message = (
+                "No model name was provided for evaluation. For multi-model deployment, "
+                "a model must be specified in the model parameters."
+            )
+            logger.debug(error_message)
+            raise AquaValueError(error_message)
+
+        # Retrieve and convert the model group count from metadata.
         model_count = custom_metadata_list.get(
             ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
         )
-
-        if model_count and custom_metadata_list:
+        try:
             model_group_count = int(model_count.value)
-        else:
-            logger.debug(
-                f"The ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT or custom_metadata_list (ModelCustomMetadata) is missing from the metadata in evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
+        except Exception as ex:
+            error_message = (
+                "Missing or invalid `MULTIMODEL_GROUP_COUNT` "
+                f"in custom metadata for evaluation source ID '{create_aqua_evaluation_details.evaluation_source_id}'. "
+                f"Details: {ex}"
+            )
+            logger.error(error_message)
+
+        if model_group_count < 1:
+            error_message = (
+                f"Invalid model group count: {model_group_count} for evaluation source ID "
+                f"'{create_aqua_evaluation_details.evaluation_source_id}'. A valid multi-model deployment "
+                f"requires at least one model."
             )
+            logger.error(error_message)
             raise AquaRuntimeError(
-                "Recreate the model deployment and retry the evaluation. An issue occured when initalizing the model group during deployment."
+                f"Cannot extract details about the multi-model deployment to evaluate. A valid multi-model deployment requires at least one model, however the provided evaluation source ID '{create_aqua_evaluation_details.evaluation_source_id}' doesn't contain details about the deployed models."
             )
 
+        # Build the list of valid model names from custom metadata.
         model_names = [
             custom_metadata_list.get(f"model-name-{idx}").value
             for idx in range(model_group_count)
         ]
 
-        valid_model_names = ", ".join(name for name in model_names if name is not None)
-
-        if "model" not in user_model_parameters:
-            logger.debug(
-                f"User did not input model name for multi model deployment evaluation with evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
-            )
-            raise AquaValueError(
-                f"Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment. The valid model names for this Model Deployment are {valid_model_names}."
-            )
-
+        # Check if the provided model name is among the valid names.
         if user_model_name not in model_names:
-            logger.debug(
-                f"User input for model name was {user_model_name}, expected {valid_model_names} evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
-            )
-            raise AquaValueError(
-                f"Provide the correct model name. The valid model names for this Model Deployment are {valid_model_names}."
+            error_message = (
+                f"Provided model name '{user_model_name}' does not match any valid model names {model_names} "
+                f"for evaluation source ID '{create_aqua_evaluation_details.evaluation_source_id}'. "
+                "Please provide the correct model name."
             )
+            logger.debug(error_message)
+            raise AquaValueError(error_message)
 
     def _build_evaluation_runtime(
         self,

From 688b5abe05ae45cdacffef76485c8ba87a0dc1bf Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Mon, 10 Mar 2025 16:05:43 -0700
Subject: [PATCH 092/124] Fixes the multi-model evaluation validator

---
 ads/aqua/evaluation/evaluation.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 815d7dbcf..01ee147bf 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -626,14 +626,15 @@ def validate_model_name(
             logger.error(error_message)
 
         if model_group_count < 1:
-            error_message = (
+            logger.error(
                 f"Invalid model group count: {model_group_count} for evaluation source ID "
                 f"'{create_aqua_evaluation_details.evaluation_source_id}'. A valid multi-model deployment "
                 f"requires at least one model."
             )
-            logger.error(error_message)
             raise AquaRuntimeError(
-                f"Cannot extract details about the multi-model deployment to evaluate. A valid multi-model deployment requires at least one model, however the provided evaluation source ID '{create_aqua_evaluation_details.evaluation_source_id}' doesn't contain details about the deployed models."
+                f"Unable to retrieve details for the multi-model deployment evaluation. A valid multi-model deployment "
+                f"must include at least one model. However, the provided evaluation source ID "
+                f"'{create_aqua_evaluation_details.evaluation_source_id}' does not contain any information about deployed models."
             )
 
         # Build the list of valid model names from custom metadata.

From d92467eb8028980772868b7208d6b081853f065f Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Mon, 10 Mar 2025 16:50:25 -0700
Subject: [PATCH 093/124] Minor fixes of the error messages.

---
 ads/aqua/model/model.py              | 4 ++--
 ads/aqua/modeldeployment/entities.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index a56501ee7..145f20a6c 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -149,7 +149,7 @@ def create(
 
         Parameters
         ----------
-        model : Union[str, AquaMultiModelRef]
+        model_id : Union[str, AquaMultiModelRef]
             The model ID as a string or a AquaMultiModelRef instance to be deployed.
         project_id : Optional[str]
             The project ID for the custom model.
@@ -294,7 +294,7 @@ def create_multi(
             if source_model.freeform_tags.get(Tags.TASK, UNKNOWN) != "text_generation":
                 raise AquaValueError(
                     f"Invalid or missing {Tags.TASK} tag for selected model {display_name}. "
-                    f"Currently only `text_generation` models are support for multi model deployment."
+                    f"Currently only `text_generation` models are supported for multi model deployment."
                 )
 
             display_name_list.append(display_name)
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 92bae5c76..82ce9e68b 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -512,10 +512,10 @@ def validate_multimodel_deployment_feasibility(
         if not self.models:
             logger.error("No models provided in the model group.")
             raise ConfigValidationError(
-                "Multi-model deployment requires at least one model. Please add one or more models."
+                "Multi-model deployment requires at least one model. Please provide one or more models."
             )
 
-        selected_shape: str = self.instance_shape
+        selected_shape = self.instance_shape
 
         # Verify that the selected shape is supported by the GPU allocation.
         if selected_shape not in models_config_summary.gpu_allocation:

From d9c6de3b80348b0c128339a18c6e7d8651d84bc8 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 11 Mar 2025 09:59:16 -0700
Subject: [PATCH 094/124] Brings the MultiModelSupportedTaskType enum

---
 ads/aqua/model/enums.py | 5 +++++
 ads/aqua/model/model.py | 8 ++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/model/enums.py b/ads/aqua/model/enums.py
index 7e61001c6..1a21adabc 100644
--- a/ads/aqua/model/enums.py
+++ b/ads/aqua/model/enums.py
@@ -23,3 +23,8 @@ class FineTuningCustomMetadata(ExtendedEnum):
     VALIDATION_METRICS_FINAL = "val_metrics_final"
     TRAINING_METRICS_EPOCH = "train_metrics_epoch"
     VALIDATION_METRICS_EPOCH = "val_metrics_epoch"
+
+
+class MultiModelSupportedTaskType(ExtendedEnum):
+    TEXT_GENERATION = "text-generation"
+    TEXT_GENERATION_ALT = "text_generation"
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 145f20a6c..a5ecc5164 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -79,6 +79,7 @@
     ImportModelDetails,
     ModelValidationResult,
 )
+from ads.aqua.model.enums import MultiModelSupportedTaskType
 from ads.common.auth import default_signer
 from ads.common.oci_resource import SEARCH_TYPE, OCIResource
 from ads.common.utils import get_console_link
@@ -291,10 +292,13 @@ def create_multi(
             #         "Currently only service models are supported for multi model deployment."
             #     )
 
-            if source_model.freeform_tags.get(Tags.TASK, UNKNOWN) != "text_generation":
+            if (
+                source_model.freeform_tags.get(Tags.TASK, UNKNOWN).lower()
+                not in MultiModelSupportedTaskType
+            ):
                 raise AquaValueError(
                     f"Invalid or missing {Tags.TASK} tag for selected model {display_name}. "
-                    f"Currently only `text_generation` models are supported for multi model deployment."
+                    f"Currently only `{MultiModelSupportedTaskType.values()}` models are supported for multi model deployment."
                 )
 
             display_name_list.append(display_name)

From b3e434fd67c0e35604114cd72ce128767173be6d Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Tue, 11 Mar 2025 13:25:24 -0400
Subject: [PATCH 095/124] Fixed unit tests.

---
 .../aqua/test_data/ui/container_index.json    |  3 ++
 .../aqua/test_deployment_handler.py           |  4 +-
 .../with_extras/aqua/test_evaluation.py       | 43 +++++++++----------
 tests/unitary/with_extras/aqua/test_model.py  |  9 +++-
 4 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/tests/unitary/with_extras/aqua/test_data/ui/container_index.json b/tests/unitary/with_extras/aqua/test_data/ui/container_index.json
index e0689d17e..a3f4dbe77 100644
--- a/tests/unitary/with_extras/aqua/test_data/ui/container_index.json
+++ b/tests/unitary/with_extras/aqua/test_data/ui/container_index.json
@@ -130,6 +130,9 @@
         "NVIDIA_GPU"
       ],
       "type": "inference",
+      "usages": [
+        "multi_model"
+      ],
       "version": "0.4.1.3"
     }
   ]
diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index b6c60fb49..7b11ceb3c 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -99,7 +99,9 @@ def test_get_multimodel_deployment_config(
         self.deployment_handler.request.path = "aqua/deployments/config"
         self.deployment_handler.get(id=["mock-model-id-one", "mock-model-id-two"])
         mock_get_multimodel_deployment_config.assert_called_with(
-            model_ids=["mock-model-id-one", "mock-model-id-two"], primary_model_id=None
+            model_ids=["mock-model-id-one", "mock-model-id-two"],
+            primary_model_id=None,
+            compartment_id=TestDataset.USER_COMPARTMENT_ID,
         )
 
     @patch("ads.aqua.modeldeployment.AquaDeploymentApp.get")
diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py
index 1a88edc50..f26f46278 100644
--- a/tests/unitary/with_extras/aqua/test_evaluation.py
+++ b/tests/unitary/with_extras/aqua/test_evaluation.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*--
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import base64
@@ -538,22 +538,20 @@ def test_create_evaluation(
         }
 
     @parameterized.expand(
-    [
-        (
-            {},
-            "Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment. The valid model names for this Model Deployment are model_one, model_two, model_three."
-        ),
-        (
-            {"model": "wrong_model_name"},
-            "Provide the correct model name. The valid model names for this Model Deployment are model_one, model_two, model_three."
-        )
-    ])
+        [
+            (
+                {},
+                "No model name was provided for evaluation. For multi-model deployment, a model must be specified in the model parameters.",
+            ),
+            (
+                {"model": "wrong_model_name"},
+                "Provided model name 'wrong_model_name' does not match any valid model names ['model_one', 'model_two', 'model_three'] for evaluation source ID 'ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>'. Please provide the correct model name.",
+            ),
+        ]
+    )
     @patch("ads.aqua.evaluation.evaluation.AquaEvaluationApp.create")
     def test_validate_model_name(
-        self,
-        mock_model_parameters,
-        expected_message,
-        mock_model
+        self, mock_model_parameters, expected_message, mock_model
     ):
         curr_dir = os.path.dirname(__file__)
 
@@ -564,7 +562,7 @@ def test_validate_model_name(
         eval_model_freeform_tags[Tags.AQUA_TAG] = "active"
 
         create_aqua_evaluation_details = dict(  # noqa: C408
-            evaluation_source_id= TestDataset.MODEL_DEPLOYMENT_ID,
+            evaluation_source_id=TestDataset.MODEL_DEPLOYMENT_ID,
             evaluation_name="test_evaluation_name",
             dataset_path="oci://dataset_bucket@namespace/prefix/dataset.jsonl",
             report_path="oci://report_bucket@namespace/prefix/",
@@ -578,19 +576,20 @@ def test_validate_model_name(
             defined_tags=eval_model_defined_tags,
         )
 
-
         aqua_multi_model = os.path.join(
             curr_dir, "test_data/deployment/aqua_multi_model.yaml"
         )
 
-        mock_model = DataScienceModel.from_yaml(
-            uri=aqua_multi_model
-        )
+        mock_model = DataScienceModel.from_yaml(uri=aqua_multi_model)
 
-        mock_create_aqua_evaluation_details = MagicMock(**create_aqua_evaluation_details, spec=CreateAquaEvaluationDetails)
+        mock_create_aqua_evaluation_details = MagicMock(
+            **create_aqua_evaluation_details, spec=CreateAquaEvaluationDetails
+        )
 
         try:
-            AquaEvaluationApp.validate_model_name(mock_model, mock_create_aqua_evaluation_details)
+            AquaEvaluationApp.validate_model_name(
+                mock_model, mock_create_aqua_evaluation_details
+            )
         except AquaError as e:
             print(str(e))
             self.assertEqual(str(e), expected_message)
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index fdba9455e..856936daa 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -364,10 +364,17 @@ def test_create_model(self, mock_from_id, mock_validate, mock_create):
     @patch.object(DataScienceModel, "add_artifact")
     @patch.object(DataScienceModel, "create")
     @patch("ads.model.datascience_model.validate")
+    @patch("ads.aqua.model.model.get_container_config")
     @patch.object(DataScienceModel, "from_id")
     def test_create_multimodel(
-        self, mock_from_id, mock_validate, mock_create, mock_add_artifact
+        self,
+        mock_from_id,
+        mock_get_container_config,
+        mock_validate,
+        mock_create,
+        mock_add_artifact,
     ):
+        mock_get_container_config.return_value = get_container_config()
         mock_model = MagicMock()
         mock_model.model_file_description = {"test_key": "test_value"}
         mock_model.display_name = "test_display_name"

From 3bff7cede26b62a61718cf0f0d9c1e548f81c4a1 Mon Sep 17 00:00:00 2001
From: Lu Peng <118394507+lu-ohai@users.noreply.github.com>
Date: Tue, 11 Mar 2025 14:01:18 -0400
Subject: [PATCH 096/124] Update test_ui.py

---
 tests/unitary/with_extras/aqua/test_ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unitary/with_extras/aqua/test_ui.py b/tests/unitary/with_extras/aqua/test_ui.py
index 27f8f17a1..4e1620c39 100644
--- a/tests/unitary/with_extras/aqua/test_ui.py
+++ b/tests/unitary/with_extras/aqua/test_ui.py
@@ -595,7 +595,7 @@ def test_list_containers(self, mock_get_container_config):
                         ],
                         "server_port": "8080",
                     },
-                    "usages": [],
+                    "usages": ["multi_model"],
                 },
             ],
             "finetune": [

From d79ea3fe3afbf37798e1386e77e0f9b25b6750df Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 11 Mar 2025 15:10:51 -0700
Subject: [PATCH 097/124] Fixes the merging conflicts with the main branch

---
 ads/aqua/modeldeployment/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 9d2188872..8d97558f0 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -252,7 +252,7 @@ def _fetch_deployment_configs_concurrently(
             results = executor.map(
                 lambda model_id: self.deployment_app.get_config(
                     model_id, AQUA_MODEL_DEPLOYMENT_CONFIG
-                ),
+                ).config,
                 model_ids,
             )
 

From 2e531d2dcb117d06482db30205011e71aba7ad1a Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 11 Mar 2025 16:16:20 -0700
Subject: [PATCH 098/124] Adds more pydocs and comments

---
 ads/aqua/app.py              | 2 +-
 ads/aqua/common/constants.py | 3 ---
 ads/aqua/model/model.py      | 5 +++--
 3 files changed, 4 insertions(+), 6 deletions(-)
 delete mode 100644 ads/aqua/common/constants.py

diff --git a/ads/aqua/app.py b/ads/aqua/app.py
index 3ef95c4a8..0370222f7 100644
--- a/ads/aqua/app.py
+++ b/ads/aqua/app.py
@@ -340,7 +340,7 @@ def get_config(
         config_file_path = os.path.join(config_path, config_file_name)
         if is_path_exists(config_file_path):
             try:
-                logger.info(
+                logger.debug(
                     f"Loading config: `{config_file_name}` from `{config_path}`"
                 )
                 config = load_config(
diff --git a/ads/aqua/common/constants.py b/ads/aqua/common/constants.py
deleted file mode 100644
index 7fe1878a1..000000000
--- a/ads/aqua/common/constants.py
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2025 Oracle and/or its affiliates.
-# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 13794b0cb..4bce3a2fe 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -285,7 +285,7 @@ def create_multi(
             display_name = source_model.display_name
             model.model_name = model.model_name or display_name
 
-            # We cannot rely on this tag, service and cached models doesn't have it.
+            # TODO Uncomment the section below, if only service models should be allowed for multi-model deployment
             # if not source_model.freeform_tags.get(Tags.AQUA_SERVICE_MODEL_TAG, UNKNOWN):
             #     raise AquaValueError(
             #         f"Invalid selected model {display_name}. "
@@ -400,7 +400,8 @@ def create_multi(
             category="Other",
         )
 
-        # Combine tags
+        # Combine tags. The `Tags.AQUA_TAG` has been excluded, because we don't want to show
+        # the models created for multi-model purpose in the AQUA models list.
         tags = {
             # Tags.AQUA_TAG: "active",
             Tags.MULTIMODEL_TYPE_TAG: "true",

From 402db5231426d96417c48c0e878927f8dd827f50 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 12 Mar 2025 16:11:37 -0400
Subject: [PATCH 099/124] Changed multi model metadata storage.

---
 ads/aqua/common/entities.py                   |  5 ++
 ads/aqua/constants.py                         |  1 +
 ads/aqua/evaluation/evaluation.py             | 24 ++++++-
 ads/aqua/model/constants.py                   |  1 +
 ads/aqua/model/model.py                       | 58 +++++------------
 ads/aqua/modeldeployment/deployment.py        | 53 ++++++---------
 .../deployment/aqua_multi_model.yaml          | 64 ++-----------------
 .../with_extras/aqua/test_deployment.py       | 39 +++++++++--
 .../with_extras/aqua/test_evaluation.py       | 40 +++++++++++-
 tests/unitary/with_extras/aqua/test_model.py  |  3 +-
 10 files changed, 144 insertions(+), 144 deletions(-)

diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py
index a8c38c335..bd7b2ede8 100644
--- a/ads/aqua/common/entities.py
+++ b/ads/aqua/common/entities.py
@@ -153,6 +153,8 @@ class AquaMultiModelRef(Serializable):
         Number of GPUs required for deployment.
     env_var : Optional[Dict[str, Any]]
         Optional environment variables to override during deployment.
+    artifact_location : Optional[str]
+        Artifact path of model in the multimodel group.
     """
 
     model_id: str = Field(..., description="The model OCID to deploy.")
@@ -163,6 +165,9 @@ class AquaMultiModelRef(Serializable):
     env_var: Optional[dict] = Field(
         default_factory=dict, description="The environment variables of the model."
     )
+    artifact_location: Optional[str] = Field(
+        None, description="Artifact path of model in the multimodel group."
+    )
 
     class Config:
         extra = "ignore"
diff --git a/ads/aqua/constants.py b/ads/aqua/constants.py
index 0f7a501ba..9aff5749c 100644
--- a/ads/aqua/constants.py
+++ b/ads/aqua/constants.py
@@ -3,6 +3,7 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 """This module defines constants used in ads.aqua module."""
 
+UNKNOWN = ""
 UNKNOWN_VALUE = ""
 READY_TO_IMPORT_STATUS = "TRUE"
 UNKNOWN_DICT = {}
diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 1a415e474..96954ab0e 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -97,6 +97,7 @@
 from ads.model.model_metadata import (
     MetadataTaxonomyKeys,
     ModelCustomMetadata,
+    ModelCustomMetadataItem,
     ModelProvenanceMetadata,
     ModelTaxonomyMetadata,
 )
@@ -578,6 +579,7 @@ def validate_model_name(
 
         This function verifies that:
         - The model group is not empty.
+        - The model multi metadata is present in the DataScienceModel metadata.
         - The user provided a non-empty model name.
         - The provided model name exists in the DataScienceModel metadata.
         - The deployment configuration contains core metadata required for validation.
@@ -636,10 +638,28 @@ def validate_model_name(
                 f"'{create_aqua_evaluation_details.evaluation_source_id}' does not contain any information about deployed models."
             )
 
+        multi_model_metadata_value = custom_metadata_list.get(
+            ModelCustomMetadataFields.MULTIMODEL_METADATA,
+            ModelCustomMetadataItem(key=ModelCustomMetadataFields.MULTIMODEL_METADATA),
+        ).value
+
+        if not multi_model_metadata_value:
+            error_message = (
+                "Recreate the model deployment and retry the evaluation. An issue occured when initalizing the model group during deployment."
+                f"The {ModelCustomMetadataFields.MULTIMODEL_METADATA} is missing from the metadata in evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}."
+            )
+            logger.debug(error_message)
+            raise AquaRuntimeError(error_message)
+
+        multi_model_metadata = json.loads(
+            evaluation_source.dsc_model.get_custom_metadata_artifact(
+                metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
+            ).decode("utf-8")
+        )
+
         # Build the list of valid model names from custom metadata.
         model_names = [
-            custom_metadata_list.get(f"model-name-{idx}").value
-            for idx in range(model_group_count)
+            metadata.get("model_name", UNKNOWN) for metadata in multi_model_metadata
         ]
 
         # Check if the provided model name is among the valid names.
diff --git a/ads/aqua/model/constants.py b/ads/aqua/model/constants.py
index cff3253b5..9c5859671 100644
--- a/ads/aqua/model/constants.py
+++ b/ads/aqua/model/constants.py
@@ -19,6 +19,7 @@ class ModelCustomMetadataFields(ExtendedEnum):
     FINETUNE_CONTAINER = "finetune-container"
     DEPLOYMENT_CONTAINER_URI = "deployment-container-uri"
     MULTIMODEL_GROUP_COUNT = "model_group_count"
+    MULTIMODEL_METADATA = "multi_model_metadata"
 
 
 class ModelTask(ExtendedEnum):
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index f3162575c..880fa5f2a 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+import json
 import os
 import pathlib
 from datetime import datetime, timedelta
@@ -93,6 +94,7 @@
     TENANCY_OCID,
 )
 from ads.model import DataScienceModel
+from ads.model.common.utils import MetadataArtifactPathType
 from ads.model.model_metadata import (
     MetadataCustomCategory,
     ModelCustomMetadata,
@@ -279,9 +281,10 @@ def create_multi(
         selected_models_deployment_containers = set()
 
         # Process each model
-        for idx, model in enumerate(models):
+        for model in models:
             source_model = DataScienceModel.from_id(model.model_id)
             display_name = source_model.display_name
+            # Update model name in user's input model
             model.model_name = model.model_name or display_name
 
             # TODO Uncomment the section below, if only service models should be allowed for multi-model deployment
@@ -310,6 +313,9 @@ def create_multi(
                     "Please register the model first."
                 )
 
+            # Update model artifact location in user's input model
+            model.artifact_location = model_artifact_path
+
             artifact_list.append(model_artifact_path)
 
             # Validate deployment container consistency
@@ -328,47 +334,6 @@ def create_multi(
 
             selected_models_deployment_containers.add(deployment_container)
 
-            # Add model-specific metadata
-            model_custom_metadata.add(
-                key=f"model-id-{idx}",
-                value=source_model.id,
-                description=f"ID of '{display_name}' in the multimodel group.",
-                category="Other",
-            )
-            model_custom_metadata.add(
-                key=f"model-name-{idx}",
-                value=display_name,
-                description=f"Name of '{display_name}' in the multimodel group.",
-                category="Other",
-            )
-            if model.gpu_count:
-                model_custom_metadata.add(
-                    key=f"model-gpu-count-{idx}",
-                    value=model.gpu_count,
-                    description=f"GPU count of '{display_name}' in the multimodel group.",
-                    category="Other",
-                )
-            user_params = (
-                " ".join(
-                    f"{name} {value}" for name, value in model.env_var.items()
-                ).strip()
-                if model.env_var
-                else UNKNOWN
-            )
-            if user_params:
-                model_custom_metadata.add(
-                    key=f"model-user-params-{idx}",
-                    value=user_params,
-                    description=f"User params of '{display_name}' in the multimodel group.",
-                    category="Other",
-                )
-            model_custom_metadata.add(
-                key=f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}",
-                value=model_artifact_path,
-                description=f"Artifact path for '{display_name}' in the multimodel group.",
-                category="Other",
-            )
-
         # Check if the all models in the group shares same container family
         if len(selected_models_deployment_containers) > 1:
             raise AquaValueError(
@@ -426,6 +391,15 @@ def create_multi(
         # Finalize creation
         custom_model.create(model_by_reference=True)
 
+        # Create custom metadata for multi model metadata
+        custom_model.create_custom_metadata_artifact(
+            metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA,
+            artifact_path_or_content=json.dumps(
+                [model.model_dump() for model in models]
+            ),
+            path_type=MetadataArtifactPathType.CONTENT,
+        )
+
         logger.info(
             f"Aqua Model '{custom_model.id}' created with models: {', '.join(display_name_list)}."
         )
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index d0d4bc0b3..dc59b1854 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -63,7 +63,7 @@
 )
 from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
 from ads.common.object_storage_details import ObjectStorageDetails
-from ads.common.utils import UNKNOWN, get_log_links
+from ads.common.utils import get_log_links
 from ads.config import (
     AQUA_DEPLOYMENT_CONTAINER_CMD_VAR_METADATA_NAME,
     AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME,
@@ -550,7 +550,7 @@ def _create_multi(
 
         container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN).strip()
 
-        for idx, model in enumerate(create_deployment_details.models):
+        for model in create_deployment_details.models:
             user_params = build_params_string(model.env_var)
             if user_params:
                 restricted_params = self._find_restricted_params(
@@ -589,22 +589,13 @@ def _create_multi(
                     params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
                     break
 
-            artifact_location_key = (
-                f"{ModelCustomMetadataFields.ARTIFACT_LOCATION}-{idx}"
-            )
-            artifact_path_prefix = aqua_model.custom_metadata_list.get(
-                artifact_location_key
-            ).value.rstrip("/")
+            artifact_path_prefix = model.artifact_location.rstrip("/")
             if ObjectStorageDetails.is_oci_path(artifact_path_prefix):
                 os_path = ObjectStorageDetails.from_path(artifact_path_prefix)
                 artifact_path_prefix = os_path.filepath.rstrip("/")
 
             model_config.append({"params": params, "model_path": artifact_path_prefix})
-
-            model_name_key = f"model-name-{idx}"
-            model_name_list.append(
-                aqua_model.custom_metadata_list.get(model_name_key).value
-            )
+            model_name_list.append(model.model_name)
 
         env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})})
 
@@ -960,28 +951,24 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail":
                 )
             aqua_model = DataScienceModel.from_id(aqua_model_id)
             custom_metadata_list = aqua_model.custom_metadata_list
-            model_group_count = int(
-                custom_metadata_list.get(
-                    ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
-                ).value
+            multi_model_metadata_value = custom_metadata_list.get(
+                ModelCustomMetadataFields.MULTIMODEL_METADATA,
+                ModelCustomMetadataItem(
+                    key=ModelCustomMetadataFields.MULTIMODEL_METADATA
+                ),
+            ).value
+            if not multi_model_metadata_value:
+                raise AquaRuntimeError(
+                    f"Invalid multi model deployment {model_deployment_id}."
+                    f"Make sure the custom metadata {ModelCustomMetadataFields.MULTIMODEL_METADATA} is added to the aqua multi model {aqua_model.display_name}."
+                )
+            multi_model_metadata = json.loads(
+                aqua_model.dsc_model.get_custom_metadata_artifact(
+                    metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
+                ).decode("utf-8")
             )
             aqua_deployment.models = [
-                AquaMultiModelRef(
-                    model_id=custom_metadata_list.get(f"model-id-{idx}").value,
-                    model_name=custom_metadata_list.get(f"model-name-{idx}").value,
-                    gpu_count=custom_metadata_list.get(
-                        f"model-gpu-count-{idx}",
-                        ModelCustomMetadataItem(key=f"model-gpu-count-{idx}"),
-                    ).value,
-                    env_var=get_params_dict(
-                        custom_metadata_list.get(
-                            f"model-user-params-{idx}",
-                            ModelCustomMetadataItem(key=f"model-user-params-{idx}"),
-                        ).value
-                        or UNKNOWN
-                    ),
-                )
-                for idx in range(model_group_count)
+                AquaMultiModelRef(**metadata) for metadata in multi_model_metadata
             ]
 
         return AquaDeploymentDetail(
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
index 6cb0df3fc..7a498d035 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml
@@ -4,66 +4,10 @@ spec:
   compartmentId: ocid1.compartment.oc1..<OCID>
   customMetadataList:
     data:
-    - category: Other
-      description: ID of model_one in the multimodel group.
-      key: model-id-0
-      value: ocid1.compartment.oc1..<OCID>
-    - category: Other
-      description: Name of model_one in the multimodel group.
-      key: model-name-0
-      value: model_one
-    - category: Other
-      description: GPU count of model_one in the multimodel group.
-      key: model-gpu-count-0
-      value: 1
-    - category: Other
-      description: User params of model_one in the multimodel group.
-      key: model-user-params-0
-      value: --test_key_one test_value_one
-    - category: Other
-      description: Artifact path for model_one in the multimodel group.
-      key: artifact_location-0
-      value: model_one_path
-    - category: Other
-      description: ID of model_two in the multimodel group.
-      key: model-id-1
-      value: ocid1.compartment.oc1..<OCID>
-    - category: Other
-      description: Name of model_two in the multimodel group.
-      key: model-name-1
-      value: model_two
-    - category: Other
-      description: GPU count of model_two in the multimodel group.
-      key: model-gpu-count-1
-      value: 1
-    - category: Other
-      description: User params of model_two in the multimodel group.
-      key: model-user-params-1
-      value: --test_key_two test_value_two
-    - category: Other
-      description: Artifact path for model_two in the multimodel group.
-      key: artifact_location-1
-      value: model_two_path
-    - category: Other
-      description: ID of model_three in the multimodel group.
-      key: model-id-2
-      value: ocid1.compartment.oc1..<OCID>
-    - category: Other
-      description: Name of model_three in the multimodel group.
-      key: model-name-2
-      value: model_three
-    - category: Other
-      description: GPU count of model_three in the multimodel group.
-      key: model-gpu-count-2
-      value: 1
-    - category: Other
-      description: User params of model_three in the multimodel group.
-      key: model-user-params-2
-      value: --test_key_three test_value_three
-    - category: Other
-      description: Artifact path for model_three in the multimodel group.
-      key: artifact_location-2
-      value: model_three_path
+    - category: null
+      description: null
+      key: multi_model_metadata
+      value: Uploaded
     - category: Other
       description: Inference container mapping for multi_model
       key: deployment-container
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 255afe744..0f60e378d 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -15,7 +15,11 @@
 import pytest
 from parameterized import parameterized
 
-from ads.aqua.common.entities import AquaMultiModelRef, ComputeShapeSummary, ModelConfigResult
+from ads.aqua.common.entities import (
+    AquaMultiModelRef,
+    ComputeShapeSummary,
+    ModelConfigResult,
+)
 import ads.aqua.modeldeployment.deployment
 import ads.config
 from ads.aqua.common.entities import AquaMultiModelRef
@@ -403,19 +407,22 @@ class TestDataset:
                 "env_var": {},
                 "gpu_count": 2,
                 "model_id": "test_model_id_1",
-                "model_name": None,
+                "model_name": "test_model_1",
+                "artifact_location": "test_location_1",
             },
             {
                 "env_var": {},
                 "gpu_count": 2,
                 "model_id": "test_model_id_2",
-                "model_name": None,
+                "model_name": "test_model_2",
+                "artifact_location": "test_location_2",
             },
             {
                 "env_var": {},
                 "gpu_count": 2,
                 "model_id": "test_model_id_3",
-                "model_name": None,
+                "model_name": "test_model_3",
+                "artifact_location": "test_location_3",
             },
         ],
         "model_id": "ocid1.datasciencemodel.oc1.<region>.<OCID>",
@@ -880,18 +887,21 @@ class TestDataset:
             "gpu_count": 1,
             "model_id": "ocid1.compartment.oc1..<OCID>",
             "model_name": "model_one",
+            "artifact_location": "artifact_location_one",
         },
         {
             "env_var": {"--test_key_two": "test_value_two"},
             "gpu_count": 1,
             "model_id": "ocid1.compartment.oc1..<OCID>",
             "model_name": "model_two",
+            "artifact_location": "artifact_location_two",
         },
         {
             "env_var": {"--test_key_three": "test_value_three"},
             "gpu_count": 1,
             "model_id": "ocid1.compartment.oc1..<OCID>",
             "model_name": "model_three",
+            "artifact_location": "artifact_location_three",
         },
     ]
 
@@ -978,10 +988,16 @@ def test_get_deployment(self, mock_get_resource_name):
         assert result.log.name == "log-name"
         assert result.log_group.name == "log-group-name"
 
+    @patch(
+        "ads.model.service.oci_datascience_model.OCIDataScienceModel.get_custom_metadata_artifact"
+    )
     @patch("ads.model.DataScienceModel.from_id")
     @patch("ads.aqua.modeldeployment.deployment.get_resource_name")
     def test_get_multi_model_deployment(
-        self, mock_get_resource_name, mock_model_from_id
+        self,
+        mock_get_resource_name,
+        mock_model_from_id,
+        mock_get_custom_metadata_artifact,
     ):
         multi_model_deployment = copy.deepcopy(
             TestDataset.multi_model_deployment_object
@@ -1012,6 +1028,13 @@ def test_get_multi_model_deployment(
             uri=aqua_multi_model
         )
 
+        multi_model_deployment_model_attributes_str = json.dumps(
+            TestDataset.multi_model_deployment_model_attributes
+        ).encode("utf-8")
+        mock_get_custom_metadata_artifact.return_value = (
+            multi_model_deployment_model_attributes_str
+        )
+
         result = self.app.get(model_deployment_id=TestDataset.MODEL_DEPLOYMENT_ID)
 
         expected_attributes = set(AquaDeploymentDetail.__annotations__.keys()) | set(
@@ -1588,17 +1611,23 @@ def test_create_deployment_for_multi_model(
 
         model_info_1 = AquaMultiModelRef(
             model_id="test_model_id_1",
+            model_name="test_model_1",
             gpu_count=2,
+            artifact_location="test_location_1",
         )
 
         model_info_2 = AquaMultiModelRef(
             model_id="test_model_id_2",
+            model_name="test_model_2",
             gpu_count=2,
+            artifact_location="test_location_2",
         )
 
         model_info_3 = AquaMultiModelRef(
             model_id="test_model_id_3",
+            model_name="test_model_3",
             gpu_count=2,
+            artifact_location="test_location_3",
         )
 
         result = self.app.create(
diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py
index 0925dd3e5..d8826e0f4 100644
--- a/tests/unitary/with_extras/aqua/test_evaluation.py
+++ b/tests/unitary/with_extras/aqua/test_evaluation.py
@@ -359,6 +359,30 @@ class TestDataset:
     INVALID_EVAL_ID = "ocid1.datasciencemodel.oc1.phx.<OCID>"
     MODEL_DEPLOYMENT_ID = "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"
 
+    multi_model_deployment_model_attributes = [
+        {
+            "env_var": {"--test_key_one": "test_value_one"},
+            "gpu_count": 1,
+            "model_id": "ocid1.compartment.oc1..<OCID>",
+            "model_name": "model_one",
+            "artifact_location": "artifact_location_one",
+        },
+        {
+            "env_var": {"--test_key_two": "test_value_two"},
+            "gpu_count": 1,
+            "model_id": "ocid1.compartment.oc1..<OCID>",
+            "model_name": "model_two",
+            "artifact_location": "artifact_location_two",
+        },
+        {
+            "env_var": {"--test_key_three": "test_value_three"},
+            "gpu_count": 1,
+            "model_id": "ocid1.compartment.oc1..<OCID>",
+            "model_name": "model_three",
+            "artifact_location": "artifact_location_three",
+        },
+    ]
+
 
 class TestAquaEvaluation(unittest.TestCase):
     """Contains unittests for TestAquaEvaluationApp."""
@@ -551,8 +575,15 @@ def test_create_evaluation(
         ]
     )
     @patch("ads.aqua.evaluation.evaluation.AquaEvaluationApp.create")
+    @patch(
+        "ads.model.datascience_model.OCIDataScienceModel.get_custom_metadata_artifact"
+    )
     def test_validate_model_name(
-        self, mock_model_parameters, expected_message, mock_model
+        self,
+        mock_model_parameters,
+        expected_message,
+        mock_get_custom_metadata_artifact,
+        mock_model,
     ):
         curr_dir = os.path.dirname(__file__)
 
@@ -583,6 +614,13 @@ def test_validate_model_name(
 
         mock_model = DataScienceModel.from_yaml(uri=aqua_multi_model)
 
+        multi_model_deployment_model_attributes_str = json.dumps(
+            TestDataset.multi_model_deployment_model_attributes
+        ).encode("utf-8")
+        mock_get_custom_metadata_artifact.return_value = (
+            multi_model_deployment_model_attributes_str
+        )
+
         mock_create_aqua_evaluation_details = MagicMock(
             **create_aqua_evaluation_details, spec=CreateAquaEvaluationDetails
         )
diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index 856936daa..4f59e3fe1 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -362,6 +362,7 @@ def test_create_model(self, mock_from_id, mock_validate, mock_create):
         assert model.provenance_metadata.training_id == "test_training_id"
 
     @patch.object(DataScienceModel, "add_artifact")
+    @patch.object(DataScienceModel, "create_custom_metadata_artifact")
     @patch.object(DataScienceModel, "create")
     @patch("ads.model.datascience_model.validate")
     @patch("ads.aqua.model.model.get_container_config")
@@ -369,9 +370,9 @@ def test_create_model(self, mock_from_id, mock_validate, mock_create):
     def test_create_multimodel(
         self,
         mock_from_id,
-        mock_get_container_config,
         mock_validate,
         mock_create,
+        mock_create_custom_metadata_artifact,
         mock_add_artifact,
     ):
         mock_get_container_config.return_value = get_container_config()

From 3dc9977f2e3209bd8527482473dc7634e75da3b3 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 12 Mar 2025 16:24:36 -0400
Subject: [PATCH 100/124] Updated pr.

---
 tests/unitary/with_extras/aqua/test_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py
index 4f59e3fe1..f202228fd 100644
--- a/tests/unitary/with_extras/aqua/test_model.py
+++ b/tests/unitary/with_extras/aqua/test_model.py
@@ -370,6 +370,7 @@ def test_create_model(self, mock_from_id, mock_validate, mock_create):
     def test_create_multimodel(
         self,
         mock_from_id,
+        mock_get_container_config,
         mock_validate,
         mock_create,
         mock_create_custom_metadata_artifact,

From 031396730a18b6f511ba7270c363ec491ab5fe2f Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Wed, 12 Mar 2025 13:50:31 -0700
Subject: [PATCH 101/124] Fixes merging conflicts

---
 ads/aqua/modeldeployment/deployment.py | 1 -
 ads/aqua/modeldeployment/entities.py   | 2 +-
 ads/aqua/modeldeployment/utils.py      | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index d0d4bc0b3..feb29d0d4 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -45,7 +45,6 @@
     AQUA_MULTI_MODEL_CONFIG,
     MODEL_BY_REFERENCE_OSS_PATH_KEY,
     MODEL_NAME_DELIMITER,
-    UNKNOWN,
     UNKNOWN_DICT,
 )
 from ads.aqua.data import AquaResourceIdentifier
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index aab83b4c9..0a6065300 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -11,7 +11,7 @@
 from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
-from ads.aqua.constants import UNKNOWN, UNKNOWN_DICT
+from ads.aqua.constants import UNKNOWN_DICT
 from ads.aqua.data import AquaResourceIdentifier
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import UNKNOWN, get_console_link
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 8d97558f0..6caad1e4f 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -12,7 +12,6 @@
 
 from ads.aqua.app import AquaApp
 from ads.aqua.common.entities import ComputeShapeSummary
-from ads.aqua.constants import UNKNOWN
 from ads.aqua.modeldeployment.entities import (
     AquaDeploymentConfig,
     ConfigurationItem,
@@ -20,6 +19,7 @@
     GPUShapeAllocation,
     ModelDeploymentConfigSummary,
 )
+from ads.common.utils import UNKNOWN
 from ads.config import AQUA_MODEL_DEPLOYMENT_CONFIG
 
 logger = logging.getLogger("ads.aqua")

From b321d5c49424d90394339b2d3a2817b5df592c9c Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 12 Mar 2025 17:10:35 -0400
Subject: [PATCH 102/124] Updated pr.

---
 ads/aqua/modeldeployment/deployment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 2ceacb1a2..2a6b342fd 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -62,7 +62,7 @@
 )
 from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
 from ads.common.object_storage_details import ObjectStorageDetails
-from ads.common.utils import get_log_links
+from ads.common.utils import UNKNOWN, get_log_links
 from ads.config import (
     AQUA_DEPLOYMENT_CONTAINER_CMD_VAR_METADATA_NAME,
     AQUA_DEPLOYMENT_CONTAINER_METADATA_NAME,

From f9a3064dff7bc09c795ae854e08c17d9765bdf64 Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 12 Mar 2025 17:11:33 -0400
Subject: [PATCH 103/124] Updated pr.

---
 ads/aqua/constants.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ads/aqua/constants.py b/ads/aqua/constants.py
index 9aff5749c..0f7a501ba 100644
--- a/ads/aqua/constants.py
+++ b/ads/aqua/constants.py
@@ -3,7 +3,6 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 """This module defines constants used in ads.aqua module."""
 
-UNKNOWN = ""
 UNKNOWN_VALUE = ""
 READY_TO_IMPORT_STATUS = "TRUE"
 UNKNOWN_DICT = {}

From 740bf3d924be289b792fda0c468b08384c56fb3d Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 12 Mar 2025 18:25:55 -0400
Subject: [PATCH 104/124] Updated pr.

---
 ads/aqua/evaluation/evaluation.py      | 28 ++++++++++++++++++--------
 ads/aqua/model/model.py                |  8 ++++++--
 ads/aqua/modeldeployment/deployment.py |  4 ++--
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 96954ab0e..9f52fbe7b 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -24,6 +24,7 @@
 from ads.aqua import logger
 from ads.aqua.app import AquaApp
 from ads.aqua.common import utils
+from ads.aqua.common.entities import AquaMultiModelRef
 from ads.aqua.common.enums import (
     DataScienceResource,
     Resource,
@@ -645,21 +646,32 @@ def validate_model_name(
 
         if not multi_model_metadata_value:
             error_message = (
-                "Recreate the model deployment and retry the evaluation. An issue occured when initalizing the model group during deployment."
-                f"The {ModelCustomMetadataFields.MULTIMODEL_METADATA} is missing from the metadata in evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}."
+                f"Required model metadata is missing for evaluation source ID: {evaluation_source.id}. "
+                f"A valid multi-model deployment requires {ModelCustomMetadataFields.MULTIMODEL_METADATA}. "
+                "Please recreate the model deployment and retry the evaluation, as an issue occurred during the initialization of the model group."
             )
             logger.debug(error_message)
             raise AquaRuntimeError(error_message)
 
-        multi_model_metadata = json.loads(
-            evaluation_source.dsc_model.get_custom_metadata_artifact(
-                metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
-            ).decode("utf-8")
-        )
+        try:
+            multi_model_metadata = json.loads(
+                evaluation_source.dsc_model.get_custom_metadata_artifact(
+                    metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA
+                ).decode("utf-8")
+            )
+        except Exception as ex:
+            error_message = (
+                f"Error fetching {ModelCustomMetadataFields.MULTIMODEL_METADATA} "
+                f"from custom metadata for evaluation source ID '{evaluation_source.id}'. "
+                f"Details: {ex}"
+            )
+            logger.error(error_message)
+            raise AquaRuntimeError(error_message)
 
         # Build the list of valid model names from custom metadata.
         model_names = [
-            metadata.get("model_name", UNKNOWN) for metadata in multi_model_metadata
+            AquaMultiModelRef(**metadata).model_name
+            for metadata in multi_model_metadata
         ]
 
         # Check if the provided model name is among the valid names.
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 880fa5f2a..6c2d5b61e 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -391,6 +391,10 @@ def create_multi(
         # Finalize creation
         custom_model.create(model_by_reference=True)
 
+        logger.info(
+            f"Aqua Model '{custom_model.id}' created with models: {', '.join(display_name_list)}."
+        )
+
         # Create custom metadata for multi model metadata
         custom_model.create_custom_metadata_artifact(
             metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA,
@@ -400,8 +404,8 @@ def create_multi(
             path_type=MetadataArtifactPathType.CONTENT,
         )
 
-        logger.info(
-            f"Aqua Model '{custom_model.id}' created with models: {', '.join(display_name_list)}."
+        logger.debug(
+            f"Multi model metadata uploaded for Aqua model: {custom_model.id}."
         )
 
         # Track telemetry event
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 2a6b342fd..0d7b6d2fb 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -958,8 +958,8 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail":
             ).value
             if not multi_model_metadata_value:
                 raise AquaRuntimeError(
-                    f"Invalid multi model deployment {model_deployment_id}."
-                    f"Make sure the custom metadata {ModelCustomMetadataFields.MULTIMODEL_METADATA} is added to the aqua multi model {aqua_model.display_name}."
+                    f"Invalid multi-model deployment: {model_deployment_id}. "
+                    f"Ensure that the required custom metadata `{ModelCustomMetadataFields.MULTIMODEL_METADATA}` is added to the AQUA multi-model `{aqua_model.display_name}` ({aqua_model.id})."
                 )
             multi_model_metadata = json.loads(
                 aqua_model.dsc_model.get_custom_metadata_artifact(

From d245278d0b5ae2fedfc82443e83b17b385e5eb4b Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Thu, 13 Mar 2025 16:37:50 -0400
Subject: [PATCH 105/124] Added validation for single model.

---
 ads/aqua/modeldeployment/entities.py          |  34 ++--
 .../aqua_summary_multi_model_single.json      | 145 ++++++++++++++++++
 .../with_extras/aqua/test_deployment.py       |  92 ++++++++++-
 3 files changed, 256 insertions(+), 15 deletions(-)
 create mode 100644 tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 0a6065300..cc33d064d 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -546,6 +546,7 @@ def validate_multimodel_deployment_feasibility(
             raise ConfigValidationError(error_message)
 
         sum_model_gpus = 0
+        is_single_model = len(self.models) == 1
 
         # Validate each model's GPU allocation against its deployment configuration.
         for model in self.models:
@@ -556,10 +557,16 @@ def validate_multimodel_deployment_feasibility(
             if not aqua_deployment_config.configuration:
                 continue
 
-            if selected_shape not in aqua_deployment_config.configuration:
+            allowed_shapes = (
+                aqua_deployment_config.shape
+                if is_single_model
+                else list(aqua_deployment_config.configuration.keys())
+            )
+
+            if selected_shape not in allowed_shapes:
                 error_message = (
                     f"Model {model.model_id} is not compatible with the selected instance shape '{selected_shape}'. "
-                    "Select a different instance shape."
+                    f"Select a different instance shape from allowed shapes {allowed_shapes}."
                 )
                 logger.error(error_message)
                 raise ConfigValidationError(error_message)
@@ -572,13 +579,22 @@ def validate_multimodel_deployment_feasibility(
             valid_gpu_configurations = [cfg.gpu_count for cfg in multi_model_configs]
 
             if model.gpu_count not in valid_gpu_configurations:
-                valid_gpu_str = ", ".join(map(str, valid_gpu_configurations))
-                error_message = (
-                    f"Model {model.model_id} allocated {model.gpu_count} GPUs, but valid GPU configurations "
-                    f"are: {valid_gpu_str}. Adjust the GPU allocation or select a larger instance shape."
-                )
-                logger.error(error_message)
-                raise ConfigValidationError(error_message)
+                if is_single_model:
+                    if model.gpu_count != total_available_gpus:
+                        error_message = (
+                            f"Model {model.model_id} allocated {model.gpu_count} GPUs, but for single model deployment a valid GPU count "
+                            f"would be {total_available_gpus}. Adjust the GPU allocation to {total_available_gpus}."
+                        )
+                        logger.error(error_message)
+                        raise ConfigValidationError(error_message)
+                else:
+                    valid_gpu_str = ", ".join(map(str, valid_gpu_configurations))
+                    error_message = (
+                        f"Model {model.model_id} allocated {model.gpu_count} GPUs, but valid GPU configurations "
+                        f"are: {valid_gpu_str}. Adjust the GPU allocation or select a larger instance shape."
+                    )
+                    logger.error(error_message)
+                    raise ConfigValidationError(error_message)
 
         # Check that the total GPU count for the model group does not exceed the instance capacity.
         if sum_model_gpus > total_available_gpus:
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json
new file mode 100644
index 000000000..729f1c961
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json
@@ -0,0 +1,145 @@
+{
+  "deployment_config": {
+    "model_a": {
+      "configuration": {
+        "BM.GPU.A10.4": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 4,
+              "parameters": {}
+            }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        },
+        "BM.GPU.A100-v2.8": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 1,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 8,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        },
+        "BM.GPU.H100.8": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 1,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 2,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 4,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 8,
+              "parameters": {}
+            }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        },
+        "VM.GPU.A10.2": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            }
+          ],
+          "parameters": {},
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        }
+      },
+      "shape": [
+        "VM.GPU.A10.2",
+        "BM.GPU.A10.4",
+        "BM.GPU.A100-v2.8",
+        "BM.GPU.H100.8"
+      ]
+    }
+  },
+  "error_message": "None",
+  "gpu_allocation": {
+    "BM.GPU.A10.4": {
+      "models": [
+        {
+          "gpu_count": 4,
+          "ocid": "model_a"
+        }
+      ],
+      "total_gpus_available": 4
+    },
+    "BM.GPU.A100-v2.8": {
+      "models": [
+        {
+          "gpu_count": 8,
+          "ocid": "model_a"
+        }
+      ],
+      "total_gpus_available": 8
+    },
+    "BM.GPU.H100.8": {
+      "models": [
+        {
+          "gpu_count": 8,
+          "ocid": "model_a"
+        }
+      ],
+      "total_gpus_available": 8
+    },
+    "VM.GPU.A10.2": {
+      "models": [
+        {
+          "gpu_count": 2,
+          "ocid": "model_a"
+        }
+      ],
+      "total_gpus_available": 2
+    }
+  }
+}
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 0f60e378d..644c018c5 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1855,11 +1855,9 @@ def test_validate_deployment_params_for_unverified_models(
                 )
 
     def validate_multimodel_deployment_feasibility_helper(
-        self, models, instance_shape, display_name, total_gpus
+        self, models, instance_shape, display_name, total_gpus, mock_path
     ):
-        config_json = os.path.join(
-            self.curr_dir, "test_data/deployment/aqua_summary_multi_model.json"
-        )
+        config_json = os.path.join(self.curr_dir, mock_path)
 
         with open(config_json, "r") as _file:
             config = json.load(_file)
@@ -1929,7 +1927,11 @@ def test_validate_multimodel_deployment_feasibility_positive(
         self, models, instance_shape, display_name, total_gpus
     ):
         self.validate_multimodel_deployment_feasibility_helper(
-            models, instance_shape, display_name, total_gpus
+            models,
+            instance_shape,
+            display_name,
+            total_gpus,
+            "test_data/deployment/aqua_summary_multi_model.json",
         )
 
     @parameterized.expand(
@@ -2012,9 +2014,87 @@ def test_validate_multimodel_deployment_feasibility_negative(
     ):
         with pytest.raises(ConfigValidationError):
             self.validate_multimodel_deployment_feasibility_helper(
-                models, instance_shape, display_name, total_gpus
+                models,
+                instance_shape,
+                display_name,
+                total_gpus,
+                "test_data/deployment/aqua_summary_multi_model.json",
+            )
+
+    @parameterized.expand(
+        [
+            (
+                [
+                    {"ocid": "model_a", "gpu_count": 2},
+                ],
+                "invalid_shape",  # unsupported gpu shape
+                "test_a",
+                2,
+                "The model group is not compatible with the selected instance shape 'invalid_shape'. Supported shapes: ['BM.GPU.H100.8', 'BM.GPU.A100-v2.8'].",
+            ),
+            (
+                [
+                    {"ocid": "model_a", "gpu_count": 3},  # invalid gpu count 3
+                ],
+                "BM.GPU.H100.8",
+                "test_a",
+                8,
+                "Model model_a allocated 3 GPUs, but for single model deployment a valid GPU count would be 8. Adjust the GPU allocation to 8.",
+            ),
+        ],
+    )
+    def test_validate_multimodel_deployment_feasibility_negative_single(
+        self,
+        models,
+        instance_shape,
+        display_name,
+        total_gpus,
+        value_error,
+    ):
+        with pytest.raises(ConfigValidationError, match=value_error):
+            self.validate_multimodel_deployment_feasibility_helper(
+                models,
+                instance_shape,
+                display_name,
+                total_gpus,
+                "test_data/deployment/aqua_summary_multi_model_single.json",
             )
 
+    @parameterized.expand(
+        [
+            (
+                [
+                    {"ocid": "model_a", "gpu_count": 8},
+                ],
+                "BM.GPU.H100.8",
+                "test_a",
+                8,
+            ),
+            (
+                [
+                    {"ocid": "model_a", "gpu_count": 2},
+                ],
+                "VM.GPU.A10.2",
+                "test_a",
+                2,
+            ),
+        ],
+    )
+    def test_validate_multimodel_deployment_feasibility_positive_single(
+        self,
+        models,
+        instance_shape,
+        display_name,
+        total_gpus,
+    ):
+        self.validate_multimodel_deployment_feasibility_helper(
+            models,
+            instance_shape,
+            display_name,
+            total_gpus,
+            "test_data/deployment/aqua_summary_multi_model_single.json",
+        )
+
 
 class TestMDInferenceResponse(unittest.TestCase):
     def setUp(self):

From 547eeba092333858c8339839dee3a1af6768d072 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 14 Mar 2025 11:58:53 -0700
Subject: [PATCH 106/124] Enhances create evaluation with adding the
 container_index_uri attribute

---
 ads/aqua/evaluation/entities.py   | 7 ++++++-
 ads/aqua/evaluation/evaluation.py | 9 ++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/evaluation/entities.py b/ads/aqua/evaluation/entities.py
index bb165edd8..7c6651931 100644
--- a/ads/aqua/evaluation/entities.py
+++ b/ads/aqua/evaluation/entities.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 """
@@ -91,6 +91,11 @@ class CreateAquaEvaluationDetails(Serializable):
     force_overwrite: Optional[bool] = False
     freeform_tags: Optional[dict] = None
     defined_tags: Optional[dict] = None
+    container_image_uri: Optional[str] = Field(
+        None,
+        description="Image URI for evaluation container runtime. "
+        "The service managed container will be used by default.",
+    )
 
     class Config:
         extra = "ignore"
diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 9f52fbe7b..f0677e507 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -430,8 +430,11 @@ def create(
                 JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING
             )
 
-        container_image = self._get_evaluation_container(
-            create_aqua_evaluation_details.evaluation_source_id
+        container_image = (
+            create_aqua_evaluation_details.container_image_uri
+            or self._get_evaluation_container(
+                create_aqua_evaluation_details.evaluation_source_id
+            )
         )
 
         evaluation_job.with_runtime(
@@ -666,7 +669,7 @@ def validate_model_name(
                 f"Details: {ex}"
             )
             logger.error(error_message)
-            raise AquaRuntimeError(error_message)
+            raise AquaRuntimeError(error_message) from ex
 
         # Build the list of valid model names from custom metadata.
         model_names = [

From 212f7993fe0fa45993aa622d2fdfe3092e1304c1 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Tue, 18 Mar 2025 13:34:24 -0700
Subject: [PATCH 107/124] fixed docstrings to include links and kwargs for
 pydantic objects

---
 ads/aqua/evaluation/evaluation.py      | 51 +++++++++++++++++++++++++-
 ads/aqua/finetuning/finetuning.py      | 51 +++++++++++++++++++++++++-
 ads/aqua/model/model.py                |  5 ++-
 ads/aqua/modeldeployment/deployment.py | 30 ++++++++++++++-
 4 files changed, 132 insertions(+), 5 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index f0677e507..af85221f0 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -141,13 +141,62 @@ def create(
         create_aqua_evaluation_details: CreateAquaEvaluationDetails = None,
         **kwargs,
     ) -> "AquaEvaluationSummary":
-        """Creates Aqua evaluation for resource.
+        """Creates Aqua evaluation for resource.\n
+        For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/f271ca63d12e3c256718f23a14d93da4b4fc086b/ai-quick-actions/cli-tips.md#create-model-evaluation
 
         Parameters
         ----------
         create_aqua_evaluation_details: CreateAquaEvaluationDetails
             The CreateAquaEvaluationDetails data class which contains all
             required and optional fields to create the aqua evaluation.
+            kwargs:
+                evaluation_source_id: str
+                    The evaluation source id. Must be either model or model deployment ocid.
+                evaluation_name: str
+                    The name for evaluation.
+                dataset_path: str
+                    The dataset path for the evaluation. Could be either a local path from notebook session
+                    or an object storage path.
+                report_path: str
+                    The report path for the evaluation. Must be an object storage path.
+                model_parameters: dict
+                    The parameters for the evaluation.
+                shape_name: str
+                    The shape name for the evaluation job infrastructure.
+                memory_in_gbs: float
+                    The memory in gbs for the shape selected.
+                ocpus: float
+                    The ocpu count for the shape selected.
+                block_storage_size: int
+                    The storage for the evaluation job infrastructure.
+                compartment_id: (str, optional). Defaults to `None`.
+                    The compartment id for the evaluation.
+                project_id: (str, optional). Defaults to `None`.
+                    The project id for the evaluation.
+                evaluation_description: (str, optional). Defaults to `None`.
+                    The description for evaluation
+                experiment_id: (str, optional). Defaults to `None`.
+                    The evaluation model version set id. If provided,
+                    evaluation model will be associated with it.
+                experiment_name: (str, optional). Defaults to `None`.
+                    The evaluation model version set name. If provided,
+                    the model version set with the same name will be used if exists,
+                    otherwise a new model version set will be created with the name.
+                experiment_description: (str, optional). Defaults to `None`.
+                    The description for the evaluation model version set.
+                log_group_id: (str, optional). Defaults to `None`.
+                    The log group id for the evaluation job infrastructure.
+                log_id: (str, optional). Defaults to `None`.
+                    The log id for the evaluation job infrastructure.
+                metrics: (list, optional). Defaults to `None`.
+                    The metrics for the evaluation.
+                force_overwrite: (bool, optional). Defaults to `False`.
+                    Whether to force overwrite the existing file in object storage.
+                freeform_tags: (dict, optional)
+                    Freeform tags for the evaluation model
+                defined_tags: (dict, optional)
+                    Defined tags for the evaluation model
+
         kwargs:
             The kwargs for creating CreateAquaEvaluationDetails instance if
             no create_aqua_evaluation_details provided.
diff --git a/ads/aqua/finetuning/finetuning.py b/ads/aqua/finetuning/finetuning.py
index 02d73c526..37e46fec5 100644
--- a/ads/aqua/finetuning/finetuning.py
+++ b/ads/aqua/finetuning/finetuning.py
@@ -87,13 +87,62 @@ class AquaFineTuningApp(AquaApp):
     def create(
         self, create_fine_tuning_details: CreateFineTuningDetails = None, **kwargs
     ) -> "AquaFineTuningSummary":
-        """Creates Aqua fine tuning for model.
+        """Creates Aqua fine tuning for model.\n
+        For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/f271ca63d12e3c256718f23a14d93da4b4fc086b/ai-quick-actions/cli-tips.md#create-fine-tuned-model
 
         Parameters
         ----------
         create_fine_tuning_details: CreateFineTuningDetails
             The CreateFineTuningDetails data class which contains all
             required and optional fields to create the aqua fine tuning.
+            kwargs:
+                ft_source_id: str The fine tuning source id. Must be model OCID.
+                ft_name: str
+                    The name for fine tuning.
+                dataset_path: str
+                    The dataset path for fine tuning. Could be either a local path from notebook session
+                    or an object storage path.
+                report_path: str
+                    The report path for fine tuning. Must be an object storage path.
+                ft_parameters: dict
+                    The parameters for fine tuning.
+                shape_name: str
+                    The shape name for fine tuning job infrastructure.
+                replica: int
+                    The replica for fine tuning job runtime.
+                validation_set_size: float
+                    The validation set size for fine tuning job. Must be a float in between [0,1).
+                ft_description: (str, optional). Defaults to `None`.
+                    The description for fine tuning.
+                compartment_id: (str, optional). Defaults to `None`.
+                    The compartment id for fine tuning.
+                project_id: (str, optional). Defaults to `None`.
+                    The project id for fine tuning.
+                experiment_id: (str, optional). Defaults to `None`.
+                    The fine tuning model version set id. If provided,
+                    fine tuning model will be associated with it.
+                experiment_name: (str, optional). Defaults to `None`.
+                    The fine tuning model version set name. If provided,
+                    the fine tuning version set with the same name will be used if exists,
+                    otherwise a new model version set will be created with the name.
+                experiment_description: (str, optional). Defaults to `None`.
+                    The description for fine tuning model version set.
+                block_storage_size: (int, optional). Defaults to 256.
+                    The storage for fine tuning job infrastructure.
+                subnet_id: (str, optional). Defaults to `None`.
+                    The custom egress for fine tuning job.
+                log_group_id: (str, optional). Defaults to `None`.
+                    The log group id for fine tuning job infrastructure.
+                log_id: (str, optional). Defaults to `None`.
+                    The log id for fine tuning job infrastructure.
+                watch_logs: (bool, optional). Defaults to `False`.
+                    The flag to watch the job run logs when a fine-tuning job is created.
+                force_overwrite: (bool, optional). Defaults to `False`.
+                    Whether to force overwrite the existing file in object storage.
+                freeform_tags: (dict, optional)
+                    Freeform tags for the fine-tuning model
+                defined_tags: (dict, optional)
+                    Defined tags for the fine-tuning model
         kwargs:
             The kwargs for creating CreateFineTuningDetails instance if
             no create_fine_tuning_details provided.
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 6c2d5b61e..4fd54ff77 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -1654,8 +1654,9 @@ def register(
         self, import_model_details: ImportModelDetails = None, **kwargs
     ) -> AquaModel:
         """Loads the model from object storage and registers as Model in Data Science Model catalog
-        The inference container and finetuning container could be of type Service Manged Container(SMC) or custom.
-        If it is custom, full container URI is expected. If it of type SMC, only the container family name is expected.
+        The inference container and finetuning container could be of type Service Managed Container(SMC) or custom.
+        If it is custom, full container URI is expected. If it of type SMC, only the container family name is expected.\n
+        For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#register-model
 
         Args:
             import_model_details (ImportModelDetails): Model details for importing the model.
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 0d7b6d2fb..67c1d9d7a 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -118,13 +118,41 @@ def create(
         **kwargs,
     ) -> "AquaDeployment":
         """
-        Creates a new Aqua model deployment.
+        Creates a new Aqua model deployment.\n
+        For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#create-model-deployment
 
         Parameters
         ----------
         create_deployment_details : CreateModelDeploymentDetails, optional
             An instance of CreateModelDeploymentDetails containing all required and optional
             fields for creating a model deployment via Aqua.
+            kwargs:
+                instance_shape (str): The instance shape used for deployment.
+                display_name (str): The name of the model deployment.
+                compartment_id (Optional[str]): The compartment OCID.
+                project_id (Optional[str]): The project OCID.
+                description (Optional[str]): The description of the deployment.
+                model_id (Optional[str]): The model OCID to deploy.
+                models (Optional[List[AquaMultiModelRef]]): List of models for multimodel deployment.
+                instance_count (int): Number of instances used for deployment.
+                log_group_id (Optional[str]): OCI logging group ID for logs.
+                access_log_id (Optional[str]): OCID for access logs.
+                predict_log_id (Optional[str]): OCID for prediction logs.
+                bandwidth_mbps (Optional[int]): Bandwidth limit on the load balancer in Mbps.
+                web_concurrency (Optional[int]): Number of worker processes/threads for handling requests.
+                server_port (Optional[int]): Server port for the Docker container image.
+                health_check_port (Optional[int]): Health check port for the Docker container image.
+                env_var (Optional[Dict[str, str]]): Environment variables for deployment.
+                container_family (Optional[str]): Image family of the model deployment container runtime.
+                memory_in_gbs (Optional[float]): Memory (in GB) for the selected shape.
+                ocpus (Optional[float]): OCPU count for the selected shape.
+                model_file (Optional[str]): File used for model deployment.
+                private_endpoint_id (Optional[str]): Private endpoint ID for model deployment.
+                container_image_uri (Optional[str]): Image URI for model deployment container runtime.
+                cmd_var (Optional[List[str]]): Command variables for the container runtime.
+                freeform_tags (Optional[Dict]): Freeform tags for model deployment.
+                defined_tags (Optional[Dict]): Defined tags for model deployment.
+
         **kwargs:
             Keyword arguments used to construct a CreateModelDeploymentDetails instance if one
             is not provided.

From 427ea3888586df547eea3ca6382664e6a0bdf5d5 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 18 Mar 2025 15:15:54 -0700
Subject: [PATCH 108/124] Minor fixes to pydocs

---
 ads/aqua/modeldeployment/deployment.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 67c1d9d7a..51c89cbcf 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -121,11 +121,10 @@ def create(
         Creates a new Aqua model deployment.\n
         For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#create-model-deployment
 
-        Parameters
-        ----------
-        create_deployment_details : CreateModelDeploymentDetails, optional
-            An instance of CreateModelDeploymentDetails containing all required and optional
-            fields for creating a model deployment via Aqua.
+        Args:
+            create_deployment_details : CreateModelDeploymentDetails, optional
+                An instance of CreateModelDeploymentDetails containing all required and optional
+                fields for creating a model deployment via Aqua.
             kwargs:
                 instance_shape (str): The instance shape used for deployment.
                 display_name (str): The name of the model deployment.
@@ -153,10 +152,6 @@ def create(
                 freeform_tags (Optional[Dict]): Freeform tags for model deployment.
                 defined_tags (Optional[Dict]): Defined tags for model deployment.
 
-        **kwargs:
-            Keyword arguments used to construct a CreateModelDeploymentDetails instance if one
-            is not provided.
-
         Returns
         -------
         AquaDeployment

From 8203b3114c5143e607ef31c3cedbe6f823dbec8c Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Wed, 19 Mar 2025 11:17:22 -0700
Subject: [PATCH 109/124] added error message for invalid shape

---
 ads/aqua/modeldeployment/deployment.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 51c89cbcf..4699b9435 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -172,12 +172,22 @@ def create(
                 "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided."
             )
 
+
         # Set defaults for compartment and project if not provided.
         compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
         project_id = create_deployment_details.project_id or PROJECT_OCID
         freeform_tags = create_deployment_details.freeform_tags
         defined_tags = create_deployment_details.defined_tags
 
+        # validate instance shape availability in compartment
+        available_shapes = self.list_shapes(compartment_id=create_deployment_details.compartment_id)
+
+        if create_deployment_details.instance_shape not in available_shapes:
+            raise AquaValueError(
+                f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' is not available in the {self.region} region. "
+                "Please choose another shape to deploy the model."
+            )
+
         # Get container config
         container_config = get_container_config()
 

From eb9f5e18a33237895a3d1024f16c5a0c46a7bfac Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Thu, 20 Mar 2025 17:35:10 -0700
Subject: [PATCH 110/124] Fixes unit tests

---
 ads/aqua/modeldeployment/deployment.py        | 14 ++--
 .../with_extras/aqua/test_deployment.py       | 75 +++++++++++++++++++
 2 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 4699b9435..5c3253678 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -172,7 +172,6 @@ def create(
                 "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided."
             )
 
-
         # Set defaults for compartment and project if not provided.
         compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID
         project_id = create_deployment_details.project_id or PROJECT_OCID
@@ -180,12 +179,17 @@ def create(
         defined_tags = create_deployment_details.defined_tags
 
         # validate instance shape availability in compartment
-        available_shapes = self.list_shapes(compartment_id=create_deployment_details.compartment_id)
+        available_shapes = [
+            shape.name.lower()
+            for shape in self.list_shapes(
+                compartment_id=create_deployment_details.compartment_id
+            )
+        ]
 
-        if create_deployment_details.instance_shape not in available_shapes:
+        if create_deployment_details.instance_shape.lower() not in available_shapes:
             raise AquaValueError(
-                f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' is not available in the {self.region} region. "
-                "Please choose another shape to deploy the model."
+                f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' "
+                f"is not available in the {self.region} region. Please choose another shape to deploy the model."
             )
 
         # Get container config
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 644c018c5..e709717c5 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1258,6 +1258,21 @@ def test_create_deployment_for_foundation_model(
             container_index_config = json.load(_file)
         mock_get_container_config.return_value = container_index_config
 
+        shapes = []
+
+        with open(
+            os.path.join(
+                self.curr_dir,
+                "test_data/deployment/aqua_deployment_shapes.json",
+            ),
+            "r",
+        ) as _file:
+            shapes = [
+                ComputeShapeSummary(**item) for item in json.load(_file)["shapes"]
+            ]
+
+        self.app.list_shapes = MagicMock(return_value=shapes)
+
         mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME
         aqua_deployment = os.path.join(
             self.curr_dir, "test_data/deployment/aqua_create_deployment.yaml"
@@ -1327,6 +1342,21 @@ def test_create_deployment_for_fine_tuned_model(
         with open(config_json, "r") as _file:
             config = json.load(_file)
 
+        shapes = []
+
+        with open(
+            os.path.join(
+                self.curr_dir,
+                "test_data/deployment/aqua_deployment_shapes.json",
+            ),
+            "r",
+        ) as _file:
+            shapes = [
+                ComputeShapeSummary(**item) for item in json.load(_file)["shapes"]
+            ]
+
+        self.app.list_shapes = MagicMock(return_value=shapes)
+
         self.app.get_deployment_config = MagicMock(
             return_value=AquaDeploymentConfig(**config)
         )
@@ -1412,6 +1442,21 @@ def test_create_deployment_for_gguf_model(
             container_index_config = json.load(_file)
         mock_get_container_config.return_value = container_index_config
 
+        shapes = []
+
+        with open(
+            os.path.join(
+                self.curr_dir,
+                "test_data/deployment/aqua_deployment_shapes.json",
+            ),
+            "r",
+        ) as _file:
+            shapes = [
+                ComputeShapeSummary(**item) for item in json.load(_file)["shapes"]
+            ]
+
+        self.app.list_shapes = MagicMock(return_value=shapes)
+
         mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME
         aqua_deployment = os.path.join(
             self.curr_dir, "test_data/deployment/aqua_create_gguf_deployment.yaml"
@@ -1493,6 +1538,21 @@ def test_create_deployment_for_tei_byoc_embedding_model(
             container_index_config = json.load(_file)
         mock_get_container_config.return_value = container_index_config
 
+        shapes = []
+
+        with open(
+            os.path.join(
+                self.curr_dir,
+                "test_data/deployment/aqua_deployment_shapes.json",
+            ),
+            "r",
+        ) as _file:
+            shapes = [
+                ComputeShapeSummary(**item) for item in json.load(_file)["shapes"]
+            ]
+
+        self.app.list_shapes = MagicMock(return_value=shapes)
+
         mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME
         aqua_deployment = os.path.join(
             self.curr_dir, "test_data/deployment/aqua_create_embedding_deployment.yaml"
@@ -1590,6 +1650,21 @@ def test_create_deployment_for_multi_model(
             container_index_config = json.load(_file)
         mock_get_container_config.return_value = container_index_config
 
+        shapes = []
+
+        with open(
+            os.path.join(
+                self.curr_dir,
+                "test_data/deployment/aqua_deployment_shapes.json",
+            ),
+            "r",
+        ) as _file:
+            shapes = [
+                ComputeShapeSummary(**item) for item in json.load(_file)["shapes"]
+            ]
+
+        self.app.list_shapes = MagicMock(return_value=shapes)
+
         deployment_config_json = os.path.join(
             self.curr_dir, "test_data/deployment/deployment_gpu_config.json"
         )

From 4713874643a8d0f7f9deb67999077633a3bc3cc2 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Mon, 24 Mar 2025 14:27:39 -0700
Subject: [PATCH 111/124] Extends Shape Validation for a single model
 deployment.

---
 ads/aqua/modeldeployment/entities.py          | 61 +++++++++++--------
 .../with_extras/aqua/test_deployment.py       |  2 +-
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index cc33d064d..a558390ee 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -17,6 +17,20 @@
 from ads.common.utils import UNKNOWN, get_console_link
 
 
+class ConfigValidationError(Exception):
+    """Exception raised for config validation."""
+
+    def __init__(
+        self,
+        message: str = (
+            "Validation failed: The provided model group configuration is incompatible "
+            "with the selected instance shape. Please verify the GPU count per model and ensure "
+            "multi-model deployment is supported for the chosen instance shape."
+        ),
+    ):
+        super().__init__(message)
+
+
 class ShapeInfo(Serializable):
     """
     Represents the configuration details for a compute instance shape.
@@ -196,7 +210,7 @@ class AquaDeploymentDetail(AquaDeployment, DataClassSerializable):
     log: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier)
 
     class Config:
-        extra = "ignore"
+        extra = "allow"
 
 
 class ShapeInfoConfig(Serializable):
@@ -345,17 +359,6 @@ class Config:
         extra = "allow"
 
 
-class ConfigValidationError(Exception):
-    """Exception raised for config validation."""
-
-    def __init__(
-        self,
-        message: str = """Validation failed: The provided model group configuration is incompatible with the selected instance shape.
-        Please verify the GPU count per model and ensure multi-model deployment is supported for the chosen instance shape.""",
-    ):
-        super().__init__(message)
-
-
 class ModelDeploymentConfigSummary(Serializable):
     """Top-level configuration model for OCI-based deployments.
 
@@ -530,9 +533,7 @@ def validate_multimodel_deployment_feasibility(
         total_available_gpus: int = models_config_summary.gpu_allocation[
             selected_shape
         ].total_gpus_available
-        model_deployment_config: Dict[str, Any] = (
-            models_config_summary.deployment_config
-        )
+        model_deployment_config = models_config_summary.deployment_config
 
         # Verify that every model in the group has a corresponding deployment configuration.
         required_model_keys = {model.model_id for model in self.models}
@@ -558,7 +559,11 @@ def validate_multimodel_deployment_feasibility(
                 continue
 
             allowed_shapes = (
-                aqua_deployment_config.shape
+                list(
+                    set(aqua_deployment_config.configuration.keys()).union(
+                        set(aqua_deployment_config.shape or [])
+                    )
+                )
                 if is_single_model
                 else list(aqua_deployment_config.configuration.keys())
             )
@@ -579,19 +584,25 @@ def validate_multimodel_deployment_feasibility(
             valid_gpu_configurations = [cfg.gpu_count for cfg in multi_model_configs]
 
             if model.gpu_count not in valid_gpu_configurations:
+                valid_gpu_str = ", ".join(map(str, valid_gpu_configurations or []))
+
                 if is_single_model:
                     if model.gpu_count != total_available_gpus:
                         error_message = (
-                            f"Model {model.model_id} allocated {model.gpu_count} GPUs, but for single model deployment a valid GPU count "
-                            f"would be {total_available_gpus}. Adjust the GPU allocation to {total_available_gpus}."
+                            f"Model {model.model_id} is configured with {model.gpu_count} GPUs, "
+                            f"which is invalid for a single-model deployment. "
+                            f"The allowed GPU configurations are: {valid_gpu_str}. Alternatively, "
+                            f"the selected instance shape supports up to {total_available_gpus} GPUs. "
+                            f"Please adjust the GPU allocation to one of these valid configurations "
+                            f"or choose a larger instance shape."
                         )
                         logger.error(error_message)
                         raise ConfigValidationError(error_message)
                 else:
-                    valid_gpu_str = ", ".join(map(str, valid_gpu_configurations))
                     error_message = (
-                        f"Model {model.model_id} allocated {model.gpu_count} GPUs, but valid GPU configurations "
-                        f"are: {valid_gpu_str}. Adjust the GPU allocation or select a larger instance shape."
+                        f"Model {model.model_id} is configured with {model.gpu_count} GPUs, which is invalid. "
+                        f"Valid GPU configurations are: {valid_gpu_str}. Please adjust the GPU allocation "
+                        f"or choose an instance shape that supports a higher GPU count."
                     )
                     logger.error(error_message)
                     raise ConfigValidationError(error_message)
@@ -599,13 +610,13 @@ def validate_multimodel_deployment_feasibility(
         # Check that the total GPU count for the model group does not exceed the instance capacity.
         if sum_model_gpus > total_available_gpus:
             error_message = (
-                f"Selected shape '{selected_shape}' has {total_available_gpus} GPUs, "
-                f"but the model group requires {sum_model_gpus} GPUs. "
-                "Adjust GPU allocations per model or choose a larger instance shape."
+                f"The selected instance shape `{selected_shape}` has {total_available_gpus} GPUs, "
+                f"but the combined GPU allocation for the model group is {sum_model_gpus} GPUs. "
+                "Please adjust the GPU allocations per model or select an instance shape with a higher GPU capacity."
             )
             logger.error(error_message)
             raise ConfigValidationError(error_message)
 
     class Config:
-        extra = "ignore"
+        extra = "allow"
         protected_namespaces = ()
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index e709717c5..68b06ca76 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -2126,7 +2126,7 @@ def test_validate_multimodel_deployment_feasibility_negative_single(
         total_gpus,
         value_error,
     ):
-        with pytest.raises(ConfigValidationError, match=value_error):
+        with pytest.raises(ConfigValidationError):
             self.validate_multimodel_deployment_feasibility_helper(
                 models,
                 instance_shape,

From e3839f918fc4e9d70bf448f94ed15ffdfd222397 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 25 Mar 2025 14:17:18 -0700
Subject: [PATCH 112/124] Update deployment_handler.py

---
 ads/aqua/extension/deployment_handler.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index fb48fd3ea..a66f5896e 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -45,12 +45,15 @@ def get(self, id: Union[str, List[str]] = None):
         url_parse = urlparse(self.request.path)
         paths = url_parse.path.strip("/")
         if paths.startswith("aqua/deployments/config"):
-            if not id or not isinstance(id, (list, str)):
+            if not id or not isinstance(id, str):
                 raise HTTPError(
                     400,
-                    f"The request to {self.request.path} must include either a single model ID or a list of model IDs.",
+                    f"Invalid request format for {self.request.path}. "
+                    "Expected a single model ID or a comma-separated list of model IDs.",
                 )
-            return self.get_deployment_config(id)
+            return self.get_deployment_config(
+                model_id=id.split(",") if "," in id else id
+            )
         elif paths.startswith("aqua/deployments/shapes"):
             return self.list_shapes()
         elif paths.startswith("aqua/deployments"):

From 314d8194150e007efd1d1180638f1368aa7691d1 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 25 Mar 2025 14:28:18 -0700
Subject: [PATCH 113/124] Update deployment_handler.py

---
 ads/aqua/extension/deployment_handler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py
index a66f5896e..4c4fc2ac5 100644
--- a/ads/aqua/extension/deployment_handler.py
+++ b/ads/aqua/extension/deployment_handler.py
@@ -51,6 +51,7 @@ def get(self, id: Union[str, List[str]] = None):
                     f"Invalid request format for {self.request.path}. "
                     "Expected a single model ID or a comma-separated list of model IDs.",
                 )
+            id = id.replace(" ", "")
             return self.get_deployment_config(
                 model_id=id.split(",") if "," in id else id
             )

From 5eeed918eb1c5f5fbdbe74efb3a6f559eb06425b Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 25 Mar 2025 19:03:29 -0700
Subject: [PATCH 114/124] Make the validation strict.

---
 ads/aqua/modeldeployment/entities.py          |  53 ++++--
 ads/aqua/modeldeployment/utils.py             | 174 ++++++++----------
 .../with_extras/aqua/test_deployment.py       |  42 +++--
 3 files changed, 136 insertions(+), 133 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index a558390ee..950f70797 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -520,11 +520,15 @@ def validate_multimodel_deployment_feasibility(
 
         selected_shape = self.instance_shape
 
+        if models_config_summary.error_message:
+            logger.error(models_config_summary.error_message)
+            raise ConfigValidationError(models_config_summary.error_message)
+
         # Verify that the selected shape is supported by the GPU allocation.
         if selected_shape not in models_config_summary.gpu_allocation:
             supported_shapes = list(models_config_summary.gpu_allocation.keys())
             error_message = (
-                f"The model group is not compatible with the selected instance shape '{selected_shape}'. "
+                f"The model group is not compatible with the selected instance shape `{selected_shape}`. "
                 f"Supported shapes: {supported_shapes}."
             )
             logger.error(error_message)
@@ -536,12 +540,14 @@ def validate_multimodel_deployment_feasibility(
         model_deployment_config = models_config_summary.deployment_config
 
         # Verify that every model in the group has a corresponding deployment configuration.
-        required_model_keys = {model.model_id for model in self.models}
-        missing_model_keys = required_model_keys - set(model_deployment_config.keys())
-        if missing_model_keys:
+        required_model_ids = {model.model_id for model in self.models}
+        missing_model_ids = required_model_ids - set(model_deployment_config.keys())
+        if missing_model_ids:
             error_message = (
-                f"Missing deployment configuration for models: {missing_model_keys}. "
-                "Ensure all selected models are properly configured."
+                f"Missing deployment configuration for models: {list(missing_model_ids)}. "
+                "Ensure all selected models are properly configured. If you are deploying custom "
+                "models that lack AQUA service configuration, refer to the deployment guidelines here: "
+                "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models"
             )
             logger.error(error_message)
             raise ConfigValidationError(error_message)
@@ -556,7 +562,15 @@ def validate_multimodel_deployment_feasibility(
 
             # Skip validation for models without deployment configuration details.
             if not aqua_deployment_config.configuration:
-                continue
+                error_message = (
+                    f"Missing deployment configuration for model `{model.model_id}`. "
+                    "Please verify that the model is correctly configured. If you are deploying custom models without AQUA service configuration, "
+                    "refer to the guidelines at: "
+                    "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models"
+                )
+
+                logger.error(error_message)
+                raise ConfigValidationError(error_message)
 
             allowed_shapes = (
                 list(
@@ -570,7 +584,7 @@ def validate_multimodel_deployment_feasibility(
 
             if selected_shape not in allowed_shapes:
                 error_message = (
-                    f"Model {model.model_id} is not compatible with the selected instance shape '{selected_shape}'. "
+                    f"Model `{model.model_id}` is not compatible with the selected instance shape `{selected_shape}`. "
                     f"Select a different instance shape from allowed shapes {allowed_shapes}."
                 )
                 logger.error(error_message)
@@ -584,23 +598,32 @@ def validate_multimodel_deployment_feasibility(
             valid_gpu_configurations = [cfg.gpu_count for cfg in multi_model_configs]
 
             if model.gpu_count not in valid_gpu_configurations:
-                valid_gpu_str = ", ".join(map(str, valid_gpu_configurations or []))
+                valid_gpu_str = valid_gpu_configurations or []
 
                 if is_single_model:
+                    # If total GPU allocation is not supported by selected model
+                    if selected_shape not in aqua_deployment_config.shape:
+                        error_message = (
+                            f"Model `{model.model_id}` is configured with {model.gpu_count} GPUs, "
+                            f"which is invalid for a single-model deployment. "
+                            f"The allowed GPU configurations are: {valid_gpu_str}."
+                        )
+                        logger.error(error_message)
+                        raise ConfigValidationError(error_message)
+
                     if model.gpu_count != total_available_gpus:
                         error_message = (
-                            f"Model {model.model_id} is configured with {model.gpu_count} GPUs, "
+                            f"Model `{model.model_id}` is configured with {model.gpu_count} GPUs, "
                             f"which is invalid for a single-model deployment. "
                             f"The allowed GPU configurations are: {valid_gpu_str}. Alternatively, "
                             f"the selected instance shape supports up to {total_available_gpus} GPUs. "
-                            f"Please adjust the GPU allocation to one of these valid configurations "
-                            f"or choose a larger instance shape."
+                            f"Please adjust the GPU allocation to one of these valid configurations."
                         )
                         logger.error(error_message)
                         raise ConfigValidationError(error_message)
                 else:
                     error_message = (
-                        f"Model {model.model_id} is configured with {model.gpu_count} GPUs, which is invalid. "
+                        f"Model `{model.model_id}` is configured with {model.gpu_count} GPUs, which is invalid. "
                         f"Valid GPU configurations are: {valid_gpu_str}. Please adjust the GPU allocation "
                         f"or choose an instance shape that supports a higher GPU count."
                     )
@@ -610,8 +633,8 @@ def validate_multimodel_deployment_feasibility(
         # Check that the total GPU count for the model group does not exceed the instance capacity.
         if sum_model_gpus > total_available_gpus:
             error_message = (
-                f"The selected instance shape `{selected_shape}` has {total_available_gpus} GPUs, "
-                f"but the combined GPU allocation for the model group is {sum_model_gpus} GPUs. "
+                f"The selected instance shape `{selected_shape}` has `{total_available_gpus}` GPUs, "
+                f"but the combined GPU allocation for the model group is `{sum_model_gpus}` GPUs. "
                 "Please adjust the GPU allocations per model or select an instance shape with a higher GPU capacity."
             )
             logger.error(error_message)
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 6caad1e4f..f02df64e0 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -18,8 +18,8 @@
     GPUModelAllocation,
     GPUShapeAllocation,
     ModelDeploymentConfigSummary,
+    MultiModelConfig,
 )
-from ads.common.utils import UNKNOWN
 from ads.config import AQUA_MODEL_DEPLOYMENT_CONFIG
 
 logger = logging.getLogger("ads.aqua")
@@ -69,11 +69,6 @@ def load(
             A summary of the deployment configurations and GPU allocations. If GPU allocation
             cannot be determined, an appropriate error message is included in the summary.
         """
-        if len(model_ids) == 1:
-            return self._load_model_deployment_configuration(
-                shapes=shapes, model_ids=model_ids
-            )
-
         return self._load_multi_model_deployment_configuration(
             shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id
         )
@@ -145,95 +140,33 @@ def _load_multi_model_deployment_configuration(
 
         if not gpu_allocation:
             summary.error_message = (
-                "Unable to determine a valid GPU allocation for the selected models based on their current configurations. "
-                "Please select a different set of models."
+                "The selected models do not have a valid GPU allocation based on their current configurations. "
+                "Please select a different model group. If you are deploying custom models that lack AQUA service configuration, "
+                "refer to the deployment guidelines here: "
+                "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models"
             )
+
             logger.debug(
                 f"GPU allocation computation failed for selected models: {model_ids}"
             )
-            return summary
 
-        summary.gpu_allocation = gpu_allocation
-        return summary
-
-    def _load_model_deployment_configuration(
-        self,
-        shapes: List[ComputeShapeSummary],
-        model_ids: List[str],
-    ) -> ModelDeploymentConfigSummary:
-        """
-        Retrieves deployment configuration for single model and allocate all available GPU count to it.
-
-        Parameters
-        ----------
-        shapes : List[ComputeShapeSummary]
-            Model deployment available shapes.
-        model_ids : List[str]
-            A list of OCIDs for the Aqua models.
-
-        Returns
-        -------
-        ModelDeploymentConfigSummary
-            A summary of the deployment configurations and GPU allocations. If GPU allocation
-            cannot be determined, an appropriate error message is included in the summary.
-        """
-        model_id = model_ids[0]
-        _, common_shapes, summary = self._fetch_model_shape_gpu(
-            shapes=shapes, model_ids=model_ids
-        )
-
-        # Find out the common shapes from deployment config and available deployment shapes
-        shape = [shape.upper() for shape in summary.deployment_config[model_id].shape]
-        if shape:
-            common_shapes = list(set(common_shapes).intersection(set(shape)))
-
-        if not common_shapes:
-            summary.error_message = (
-                "The selected model does not have any available deployment shape. "
-                "Please ensure that chosen model is compatible for multi-model deployment."
-            )
-            logger.debug(
-                f"No compatible deployment shapes found for selected model: {model_id}"
-            )
             return summary
 
-        logger.debug(f"Available Common Shapes: {common_shapes}")
-
-        gpu_allocation = {}
-        for shape in common_shapes:
-            total_gpus_available = 0
-            shape_summary = next(
-                (
-                    deployment_shape
-                    for deployment_shape in shapes
-                    if deployment_shape.name.upper() == shape
-                ),
-                None,
-            )
-            if shape_summary and shape_summary.gpu_specs:
-                total_gpus_available = shape_summary.gpu_specs.gpu_count
-
-            if total_gpus_available != 0:
-                gpu_allocation[shape] = GPUShapeAllocation(
-                    models=[
-                        GPUModelAllocation(
-                            ocid=model_id, gpu_count=total_gpus_available
-                        )
-                    ],
-                    total_gpus_available=total_gpus_available,
-                )
-
         summary.gpu_allocation = gpu_allocation
         return summary
 
-    def _fetch_model_shape_gpu(self, shapes: List[ComputeShapeSummary], model_ids: str):
+    def _fetch_model_shape_gpu(
+        self, shapes: List[ComputeShapeSummary], model_ids: List[str]
+    ):
         """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance."""
         # Fetch deployment configurations concurrently.
         logger.debug(f"Loading model deployment configuration for models: {model_ids}")
         deployment_configs = self._fetch_deployment_configs_concurrently(model_ids)
 
         logger.debug(f"Loaded config: {deployment_configs}")
-        model_shape_gpu, deployment = self._extract_model_shape_gpu(deployment_configs)
+        model_shape_gpu, deployment = self._extract_model_shape_gpu(
+            deployment_configs=deployment_configs, shapes=shapes
+        )
 
         # Initialize the summary result with the deployment configurations.
         summary = ModelDeploymentConfigSummary(deployment_config=deployment)
@@ -262,7 +195,9 @@ def _fetch_deployment_configs_concurrently(
         }
 
     def _extract_model_shape_gpu(
-        self, deployment_configs: Dict[str, AquaDeploymentConfig]
+        self,
+        deployment_configs: Dict[str, AquaDeploymentConfig],
+        shapes: List[ComputeShapeSummary],
     ):
         """Extracts shape and GPU count details from deployment configurations.
         Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config.
@@ -278,20 +213,57 @@ def _extract_model_shape_gpu(
             # Our current configuration does not support this flexibility.
             # For single model deployment, we use `config.shape` to find the available shapes.
             multi_deployment_shape = (
-                config.shape if is_single_model else list(config.configuration.keys())
+                list(set(config.configuration.keys()).union(set(config.shape or [])))
+                if is_single_model
+                else list(config.configuration.keys())
             )
-            if not is_single_model:
-                model_shape_gpu[model_id] = {
-                    shape.upper(): [
-                        item.gpu_count
-                        for item in config.configuration.get(
-                            shape, ConfigurationItem()
-                        ).multi_model_deployment
-                    ]
-                    for shape in multi_deployment_shape
-                }
+
+            shape_total_gpus_available_map = {
+                deployment_shape.name.upper(): deployment_shape.gpu_specs.gpu_count
+                or None
+                for deployment_shape in shapes
+                if deployment_shape and deployment_shape.gpu_specs
+            }
+
+            model_shape_gpu[model_id] = {
+                shape.upper(): [
+                    item.gpu_count
+                    for item in config.configuration.get(
+                        shape,
+                        ConfigurationItem(
+                            multi_model_deployment=(
+                                [
+                                    MultiModelConfig(
+                                        gpu_count=shape_total_gpus_available_map.get(
+                                            shape.upper()
+                                        )
+                                    )
+                                ]
+                                if is_single_model
+                                else []
+                            )
+                        ),
+                    ).multi_model_deployment
+                ]
+                for shape in multi_deployment_shape
+            }
+
+            # For single-model deployments: if the shape is listed in the `shapes` section of the config,
+            # we include the maximum available GPU count for that shape in the allocation consideration.
+            if is_single_model:
+                for shape in model_shape_gpu[model_id]:
+                    shape_total_gpu_count = shape_total_gpus_available_map.get(
+                        shape.upper()
+                    )
+                    if (
+                        shape_total_gpu_count
+                        and shape_total_gpu_count
+                        not in model_shape_gpu[model_id][shape]
+                    ):
+                        model_shape_gpu[model_id][shape].append(shape_total_gpu_count)
+
             deployment[model_id] = {
-                "shape": [shape.upper() for shape in multi_deployment_shape],
+                "shape": [shape.upper() for shape in config.shape],
                 "configuration": {
                     shape.upper(): config.configuration.get(shape, ConfigurationItem())
                     for shape in multi_deployment_shape
@@ -322,6 +294,7 @@ def _compute_gpu_allocation(
         primary_model_id: Optional[str],
     ) -> Dict[str, GPUShapeAllocation]:
         """Computes GPU allocation for common shapes."""
+
         gpu_allocation = {}
 
         for common_shape in common_shapes:
@@ -337,12 +310,17 @@ def _compute_gpu_allocation(
 
             # generate a list of possible gpu count from `total_gpus_available` for custom models
             # without multi model deployment config
+            # model_gpu = {
+            #     model: (
+            #         shape_gpu[common_shape]
+            #         if shape_gpu.get(common_shape, UNKNOWN)
+            #         else self._generate_gpu_list(total_gpus_available)
+            #     )
+            #     for model, shape_gpu in model_shape_gpu.items()
+            # }
+
             model_gpu = {
-                model: (
-                    shape_gpu[common_shape]
-                    if shape_gpu.get(common_shape, UNKNOWN)
-                    else self._generate_gpu_list(total_gpus_available)
-                )
+                model: (shape_gpu.get(common_shape, []) or [])
                 for model, shape_gpu in model_shape_gpu.items()
             }
 
@@ -441,7 +419,7 @@ def _verify_compatibility(
             for combination in combinations:
                 if (
                     len(combination) == len(model_gpu_dict_copy)
-                    and sum(combination.values()) == total_gpus_available
+                    and sum(combination.values()) <= total_gpus_available
                 ):
                     difference = max(combination.values()) - min(combination.values())
                     if difference < minimal_difference:
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 68b06ca76..a59e6c361 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1174,26 +1174,28 @@ def test_get_multimodel_deployment_config_hybrid(
             ["model_a", "model_b", "model_c"]
         )
 
-        assert (
-            result.model_dump()
-            == TestDataset.aqua_deployment_multi_model_config_summary_hybrid
-        )
-
-        # all custom models without deployment config
-        # deployment shape should be collected from `list_shapes` and gpu list will be generated by ads sdk.
-        mock_fetch_deployment_configs_concurrently.return_value = {
-            "model_a": AquaDeploymentConfig(),
-            "model_b": AquaDeploymentConfig(),
-            "model_c": AquaDeploymentConfig(),
-        }
-        result = self.app.get_multimodel_deployment_config(
-            ["model_a", "model_b", "model_c"]
-        )
-
-        assert (
-            result.model_dump()
-            == TestDataset.aqua_deployment_multi_model_config_summary_all_empty
-        )
+        assert result.error_message != ""
+
+        # assert (
+        #     result.model_dump()
+        #     == TestDataset.aqua_deployment_multi_model_config_summary_hybrid
+        # )
+
+        # # all custom models without deployment config
+        # # deployment shape should be collected from `list_shapes` and gpu list will be generated by ads sdk.
+        # mock_fetch_deployment_configs_concurrently.return_value = {
+        #     "model_a": AquaDeploymentConfig(),
+        #     "model_b": AquaDeploymentConfig(),
+        #     "model_c": AquaDeploymentConfig(),
+        # }
+        # result = self.app.get_multimodel_deployment_config(
+        #     ["model_a", "model_b", "model_c"]
+        # )
+
+        # assert (
+        #     result.model_dump()
+        #     == TestDataset.aqua_deployment_multi_model_config_summary_all_empty
+        # )
 
     def test_verify_compatibility(self):
         result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility(

From c4248b6434664fbd99574f45c8f4ac83df62d3cd Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Tue, 25 Mar 2025 22:39:49 -0700
Subject: [PATCH 115/124] Fix unit tests

---
 ads/aqua/modeldeployment/utils.py             |   3 +-
 .../deployment/aqua_summary_multi_model.json  | 586 +++++++++---------
 .../aqua_summary_multi_model_single.json      |   2 +-
 .../with_extras/aqua/test_deployment.py       |   8 +-
 4 files changed, 309 insertions(+), 290 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index f02df64e0..04a854cc6 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -391,7 +391,6 @@ def _verify_compatibility(
         tuple:
             A tuple of gpu count allocation result.
         """
-
         model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
         if primary_model_id:
             primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
@@ -419,7 +418,7 @@ def _verify_compatibility(
             for combination in combinations:
                 if (
                     len(combination) == len(model_gpu_dict_copy)
-                    and sum(combination.values()) <= total_gpus_available
+                    and sum(combination.values()) == total_gpus_available
                 ):
                     difference = max(combination.values()) - min(combination.values())
                     if difference < minimal_difference:
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json
index 272f67a8f..ae5ed7b72 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json
@@ -1,295 +1,313 @@
 {
-    "deployment_config": {
-        "model_a": {
-            "shape": [
-                "VM.GPU.A10.2",
-                "BM.GPU.A10.4",
-                "BM.GPU.A100-v2.8",
-                "BM.GPU.H100.8"
-            ],
-            "configuration": {
-                "VM.GPU.A10.2": {
-                    "parameters": {},
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
-                "BM.GPU.A10.4": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 4,
-                            "parameters": {}
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
-                "BM.GPU.A100-v2.8": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 1,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 8,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
-                "BM.GPU.H100.8": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 1,
-                            "parameters": {}
-                        },
-                        {
-                            "gpu_count": 2,
-                            "parameters": {}
-                        },
-                        {
-                            "gpu_count": 4,
-                            "parameters": {}
-                        },
-                        {
-                            "gpu_count": 8,
-                            "parameters": {}
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                }
+  "deployment_config": {
+    "model_a": {
+      "configuration": {
+        "BM.GPU.A10.4": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 4,
+              "parameters": {}
             }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
         },
-        "model_b": {
-            "shape": [
-                "VM.GPU.A10.2",
-                "BM.GPU.A10.4",
-                "BM.GPU.A100-v2.8",
-                "BM.GPU.H100.8"
-            ],
-            "configuration": {
-                "VM.GPU.A10.2": {
-                    "parameters": {},
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
-                "BM.GPU.A10.4": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 4,
-                            "parameters": {}
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
-                "BM.GPU.A100-v2.8": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 1,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 8,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
-                "BM.GPU.H100.8": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 1,
-                            "parameters": {}
-                        },
-                        {
-                            "gpu_count": 2,
-                            "parameters": {}
-                        },
-                        {
-                            "gpu_count": 8,
-                            "parameters": {}
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                }
+        "BM.GPU.A100-v2.8": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 1,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 8,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
             }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
         },
-        "model_c": {
-            "shape": [
-                "VM.GPU.A10.2",
-                "BM.GPU.A10.4",
-                "BM.GPU.H100.8"
-            ],
-            "configuration": {
-                "VM.GPU.A10.2": {
-                    "parameters": {},
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
-                "BM.GPU.A10.4": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 2,
-                            "parameters": {
-                                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
-                            }
-                        },
-                        {
-                            "gpu_count": 4,
-                            "parameters": {}
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                },
-                "BM.GPU.H100.8": {
-                    "parameters": {
-                        "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
-                    },
-                    "multi_model_deployment": [
-                        {
-                            "gpu_count": 1,
-                            "parameters": {}
-                        },
-                        {
-                            "gpu_count": 2,
-                            "parameters": {}
-                        },
-                        {
-                            "gpu_count": 4,
-                            "parameters": {}
-                        },
-                        {
-                            "gpu_count": 8,
-                            "parameters": {}
-                        }
-                    ],
-                    "shape_info": {
-                        "configs": [],
-                        "type": ""
-                    }
-                }
+        "BM.GPU.H100.8": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 1,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 2,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 4,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 8,
+              "parameters": {}
             }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        },
+        "VM.GPU.A10.2": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            }
+          ],
+          "parameters": {},
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
         }
+      },
+      "shape": [
+        "VM.GPU.A10.2",
+        "BM.GPU.A10.4",
+        "BM.GPU.A100-v2.8",
+        "BM.GPU.H100.8"
+      ]
     },
-    "gpu_allocation": {
-        "BM.GPU.H100.8" : {
-            "models": [
-                {"ocid": "model_a", "gpu_count" : 2},
-                {"ocid": "model_b", "gpu_count" : 2},
-                {"ocid": "model_c", "gpu_count" : 2}
-            ],
-            "total_gpus_available": 8
+    "model_b": {
+      "configuration": {
+        "BM.GPU.A10.4": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 4,
+              "parameters": {}
+            }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
         },
-        "BM.GPU.A100-v2.8" : {
-            "models": [
-                {"ocid": "model_a", "gpu_count" : 2},
-                {"ocid": "model_b", "gpu_count" : 2},
-                {"ocid": "model_c", "gpu_count" : 2}
-            ],
-            "total_gpus_available": 8
+        "BM.GPU.A100-v2.8": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 1,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 8,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        },
+        "BM.GPU.H100.8": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 1,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 2,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 8,
+              "parameters": {}
+            }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        },
+        "VM.GPU.A10.2": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            }
+          ],
+          "parameters": {},
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
         }
+      },
+      "shape": [
+        "VM.GPU.A10.2",
+        "BM.GPU.A10.4",
+        "BM.GPU.A100-v2.8",
+        "BM.GPU.H100.8"
+      ]
     },
-    "error_message": "None"
-}
\ No newline at end of file
+    "model_c": {
+      "configuration": {
+        "BM.GPU.A10.4": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            },
+            {
+              "gpu_count": 4,
+              "parameters": {}
+            }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        },
+        "BM.GPU.H100.8": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 1,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 2,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 4,
+              "parameters": {}
+            },
+            {
+              "gpu_count": 8,
+              "parameters": {}
+            }
+          ],
+          "parameters": {
+            "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000"
+          },
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        },
+        "VM.GPU.A10.2": {
+          "multi_model_deployment": [
+            {
+              "gpu_count": 2,
+              "parameters": {
+                "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000"
+              }
+            }
+          ],
+          "parameters": {},
+          "shape_info": {
+            "configs": [],
+            "type": ""
+          }
+        }
+      },
+      "shape": [
+        "VM.GPU.A10.2",
+        "BM.GPU.A10.4",
+        "BM.GPU.H100.8"
+      ]
+    }
+  },
+  "error_message": null,
+  "gpu_allocation": {
+    "BM.GPU.A100-v2.8": {
+      "models": [
+        {
+          "gpu_count": 2,
+          "ocid": "model_a"
+        },
+        {
+          "gpu_count": 2,
+          "ocid": "model_b"
+        },
+        {
+          "gpu_count": 2,
+          "ocid": "model_c"
+        }
+      ],
+      "total_gpus_available": 8
+    },
+    "BM.GPU.H100.8": {
+      "models": [
+        {
+          "gpu_count": 2,
+          "ocid": "model_a"
+        },
+        {
+          "gpu_count": 2,
+          "ocid": "model_b"
+        },
+        {
+          "gpu_count": 2,
+          "ocid": "model_c"
+        }
+      ],
+      "total_gpus_available": 8
+    }
+  }
+}
diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json
index 729f1c961..491049250 100644
--- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json
+++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json
@@ -103,7 +103,7 @@
       ]
     }
   },
-  "error_message": "None",
+  "error_message": null,
   "gpu_allocation": {
     "BM.GPU.A10.4": {
       "models": [
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index a59e6c361..a69ab6f00 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -1132,9 +1132,11 @@ def test_get_multimodel_deployment_config_single(
         }
         result = self.app.get_multimodel_deployment_config(["model_a"])
 
-        assert (
-            result.model_dump()
-            == TestDataset.aqua_deployment_multi_model_config_single_custom
+        assert result.error_message == (
+            "The selected models do not have a valid GPU allocation based on their current configurations. "
+            "Please select a different model group. If you are deploying custom models that lack AQUA service configuration, "
+            "refer to the deployment guidelines here: "
+            "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models"
         )
 
     @patch(

From 44bb51d619135f9dc71f17c4fe0b0eec371b69cf Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Wed, 26 Mar 2025 09:59:03 -0700
Subject: [PATCH 116/124] Fixes edge case with the shapes.

---
 ads/aqua/modeldeployment/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 04a854cc6..daeb4fce1 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -256,7 +256,8 @@ def _extract_model_shape_gpu(
                         shape.upper()
                     )
                     if (
-                        shape_total_gpu_count
+                        shape in config.shape
+                        and shape_total_gpu_count
                         and shape_total_gpu_count
                         not in model_shape_gpu[model_id][shape]
                     ):

From 66fb84d00ba84928095d7a86106df1db392b431d Mon Sep 17 00:00:00 2001
From: Lu Peng <bolu.peng@oracle.com>
Date: Wed, 26 Mar 2025 16:12:56 -0400
Subject: [PATCH 117/124] Added while loop in _verify_compatibility

---
 ads/aqua/modeldeployment/utils.py | 114 ++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 38 deletions(-)

diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index daeb4fce1..7bdc03cb0 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -371,7 +371,11 @@ def _verify_compatibility(
         If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
         If provided, gpu count for each compatible shape will be prioritized for primary model.
 
-        For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
+        Example
+        -------
+
+        Case 1:
+        There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
 
         A - BM.GPU.H100.8 - 1, 2, 4, 8
         B - BM.GPU.H100.8 - 1, 2, 4, 8
@@ -380,6 +384,16 @@ def _verify_compatibility(
         If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
         If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
 
+        Case 2:
+        There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
+
+        A - BM.GPU.H100.8 - 1
+        B - BM.GPU.H100.8 - 1, 2, 4
+        C - BM.GPU.H100.8 - 1, 2, 4
+
+        If no primary model is provided, the gpu allocation for A, B, C could be [1, 1, 2] or [1, 2, 1]
+        If C is the primary model, the gpu allocation is [1, 1, 2] as C always gets the maximum gpu count.
+
         Parameters
         ----------
         model_gpu_dict: Dict
@@ -393,50 +407,74 @@ def _verify_compatibility(
             A tuple of gpu count allocation result.
         """
         model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
-        if primary_model_id:
+        # minimal gpu count needed to satisfy all models
+        minimal_gpus_needed = len(model_gpu_dict)
+        if primary_model_id and minimal_gpus_needed > 1:
             primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
-            for gpu_count in reversed(primary_model_gpu_list):
-                combinations = self.get_combinations(model_gpu_dict_copy)
+            primary_model_gpu_list.reverse()
+            combinations = self.get_combinations(model_gpu_dict_copy)
+            for gpu_count in primary_model_gpu_list:
+                current_gpus_available = total_gpus_available
+                while (
+                    current_gpus_available >= minimal_gpus_needed
+                    or current_gpus_available == 1
+                ):
+                    for combination in combinations:
+                        if (
+                            len(combination) == len(model_gpu_dict_copy)
+                            and sum(combination.values())
+                            == current_gpus_available - gpu_count
+                        ):
+                            combination[primary_model_id] = gpu_count
+                            return (
+                                True,
+                                [
+                                    GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
+                                    for ocid, gpu_count in combination.items()
+                                ],
+                            )
+
+                    current_gpus_available -= 2
+                    current_gpus_available = (
+                        1 if current_gpus_available == 0 else current_gpus_available
+                    )
+        else:
+            combinations = self.get_combinations(model_gpu_dict_copy)
+            current_gpus_available = total_gpus_available
+            while (
+                current_gpus_available >= minimal_gpus_needed
+                or current_gpus_available == 1
+            ):
+                minimal_difference = float("inf")  # gets the positive infinity
+                optimal_combination = []
                 for combination in combinations:
                     if (
                         len(combination) == len(model_gpu_dict_copy)
-                        and sum(combination.values())
-                        == total_gpus_available - gpu_count
+                        and sum(combination.values()) == current_gpus_available
                     ):
-                        combination[primary_model_id] = gpu_count
-                        return (
-                            True,
-                            [
-                                GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
-                                for ocid, gpu_count in combination.items()
-                            ],
+                        difference = max(combination.values()) - min(
+                            combination.values()
                         )
+                        if difference < minimal_difference:
+                            minimal_difference = difference
+                            optimal_combination = combination
+
+                            # find the optimal combination, no need to continue
+                            if minimal_difference == 0:
+                                break
+
+                if optimal_combination:
+                    return (
+                        True,
+                        [
+                            GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
+                            for ocid, gpu_count in optimal_combination.items()
+                        ],
+                    )
 
-        else:
-            combinations = self.get_combinations(model_gpu_dict_copy)
-            minimal_difference = float("inf")  # gets the positive infinity
-            optimal_combination = []
-            for combination in combinations:
-                if (
-                    len(combination) == len(model_gpu_dict_copy)
-                    and sum(combination.values()) == total_gpus_available
-                ):
-                    difference = max(combination.values()) - min(combination.values())
-                    if difference < minimal_difference:
-                        minimal_difference = difference
-                        optimal_combination = combination
-
-                        # find the optimal combination, no need to continue
-                        if minimal_difference == 0:
-                            break
-
-            if optimal_combination:
-                return (
-                    True,
-                    [
-                        GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
-                        for ocid, gpu_count in optimal_combination.items()
-                    ],
+                current_gpus_available -= 2
+                current_gpus_available = (
+                    1 if current_gpus_available == 0 else current_gpus_available
                 )
 
         return (False, [])

From 8e386afac83c0f8d2830c08d93146c86c1d127c1 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Wed, 26 Mar 2025 21:55:21 -0700
Subject: [PATCH 118/124] Fixes unit tests

---
 ads/aqua/modeldeployment/deployment.py        | 22 +++++++------
 ads/aqua/modeldeployment/entities.py          | 32 ++++++++++++-------
 .../with_extras/aqua/test_deployment.py       |  2 +-
 .../aqua/test_deployment_handler.py           |  2 +-
 4 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 5c3253678..9d9e7c09d 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -1049,20 +1049,22 @@ def get_multimodel_deployment_config(
         **kwargs: Dict,
     ) -> ModelDeploymentConfigSummary:
         """
-        Retrieves the deployment configuration for multiple Aqua models and calculates
-        the GPU allocations for all compatible shapes.
+        Retrieves the deployment configuration for multiple models and calculates
+        GPU allocations across all compatible shapes.
 
-        If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
-        If provided, gpu count for each compatible shape will be prioritized for primary model.
+        More details:
+        https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#get_multimodel_deployment_config
 
-        For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
+        CLI example:
+        ads aqua deployment get_multimodel_deployment_config --model_ids '["ocid1.datasciencemodel.oc1.iad.OCID"]'
 
-        A - BM.GPU.H100.8 - 1, 2, 4, 8
-        B - BM.GPU.H100.8 - 1, 2, 4, 8
-        C - BM.GPU.H100.8 - 1, 2, 4, 8
+        If a primary model ID is provided, GPU allocation will prioritize that model
+        when selecting compatible shapes.
 
-        If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
-        If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
+        Example:
+        Assume all three models: A, B, and C, support the same shape: "BM.GPU.H100.8" and each supports the following GPU counts for that shape: 1, 2, 4, 8.
+        If `no` primary model is specified, valid allocations could be: [2, 4, 2], [2, 2, 4], or [4, 2, 2]
+        If `B` is set as the primary model, the allocation will be: [2, 4, 2], where B receives the maximum available GPU count
 
         Parameters
         ----------
diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 950f70797..655c6ddc2 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -604,38 +604,46 @@ def validate_multimodel_deployment_feasibility(
                     # If total GPU allocation is not supported by selected model
                     if selected_shape not in aqua_deployment_config.shape:
                         error_message = (
-                            f"Model `{model.model_id}` is configured with {model.gpu_count} GPUs, "
-                            f"which is invalid for a single-model deployment. "
-                            f"The allowed GPU configurations are: {valid_gpu_str}."
+                            f"Model `{model.model_id}` is configured with {model.gpu_count} GPU(s), "
+                            f"which is invalid. The allowed GPU configurations are: {valid_gpu_str}."
                         )
                         logger.error(error_message)
                         raise ConfigValidationError(error_message)
 
                     if model.gpu_count != total_available_gpus:
                         error_message = (
-                            f"Model `{model.model_id}` is configured with {model.gpu_count} GPUs, "
-                            f"which is invalid for a single-model deployment. "
-                            f"The allowed GPU configurations are: {valid_gpu_str}. Alternatively, "
-                            f"the selected instance shape supports up to {total_available_gpus} GPUs. "
-                            f"Please adjust the GPU allocation to one of these valid configurations."
+                            f"Model '{model.model_id}' is configured to use {model.gpu_count} GPU(s), "
+                            f"which not fully utilize the selected instance shape with {total_available_gpus} available GPU(s). "
+                            "Consider adjusting the GPU allocation to better utilize the available resources and maximize performance."
                         )
                         logger.error(error_message)
                         raise ConfigValidationError(error_message)
+
                 else:
                     error_message = (
-                        f"Model `{model.model_id}` is configured with {model.gpu_count} GPUs, which is invalid. "
+                        f"Model `{model.model_id}` is configured with {model.gpu_count} GPU(s), which is invalid. "
                         f"Valid GPU configurations are: {valid_gpu_str}. Please adjust the GPU allocation "
                         f"or choose an instance shape that supports a higher GPU count."
                     )
                     logger.error(error_message)
                     raise ConfigValidationError(error_message)
 
+            if is_single_model and model.gpu_count != total_available_gpus:
+                error_message = (
+                    f"Model '{model.model_id}' is configured to use {model.gpu_count} GPU(s), "
+                    f"which not fully utilize the selected instance shape with {total_available_gpus} available GPU(s). "
+                    "This configuration may lead to suboptimal performance for a single-model deployment. "
+                    "Consider adjusting the GPU allocation to better utilize the available resources and maximize performance."
+                )
+                logger.warning(error_message)
+                # raise ConfigValidationError(error_message)
+
         # Check that the total GPU count for the model group does not exceed the instance capacity.
         if sum_model_gpus > total_available_gpus:
             error_message = (
-                f"The selected instance shape `{selected_shape}` has `{total_available_gpus}` GPUs, "
-                f"but the combined GPU allocation for the model group is `{sum_model_gpus}` GPUs. "
-                "Please adjust the GPU allocations per model or select an instance shape with a higher GPU capacity."
+                f"The selected instance shape `{selected_shape}` provides `{total_available_gpus}` GPU(s), "
+                f"but the total GPU allocation required by the model group is `{sum_model_gpus}` GPU(s). "
+                "Please adjust the GPU allocation per model or choose an instance shape with greater GPU capacity."
             )
             logger.error(error_message)
             raise ConfigValidationError(error_message)
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index a69ab6f00..88726a5e8 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -2113,7 +2113,7 @@ def test_validate_multimodel_deployment_feasibility_negative(
             ),
             (
                 [
-                    {"ocid": "model_a", "gpu_count": 3},  # invalid gpu count 3
+                    {"ocid": "model_a", "gpu_count": 3},
                 ],
                 "BM.GPU.H100.8",
                 "test_a",
diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py
index 7b11ceb3c..9e9be2b34 100644
--- a/tests/unitary/with_extras/aqua/test_deployment_handler.py
+++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py
@@ -97,7 +97,7 @@ def test_get_multimodel_deployment_config(
     ):
         """Test get method to return multi model deployment config"""
         self.deployment_handler.request.path = "aqua/deployments/config"
-        self.deployment_handler.get(id=["mock-model-id-one", "mock-model-id-two"])
+        self.deployment_handler.get(id="mock-model-id-one,mock-model-id-two")
         mock_get_multimodel_deployment_config.assert_called_with(
             model_ids=["mock-model-id-one", "mock-model-id-two"],
             primary_model_id=None,

From 303a0ee5db2954e12fadabca34c4880924d0d784 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Thu, 27 Mar 2025 11:03:36 -0700
Subject: [PATCH 119/124] Covers the exceptional case of GPU allocation for the
 phi-4 models

---
 ads/aqua/modeldeployment/entities.py | 18 +++++++++---------
 ads/aqua/modeldeployment/utils.py    | 20 ++++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py
index 655c6ddc2..5899e5b2f 100644
--- a/ads/aqua/modeldeployment/entities.py
+++ b/ads/aqua/modeldeployment/entities.py
@@ -628,15 +628,15 @@ def validate_multimodel_deployment_feasibility(
                     logger.error(error_message)
                     raise ConfigValidationError(error_message)
 
-            if is_single_model and model.gpu_count != total_available_gpus:
-                error_message = (
-                    f"Model '{model.model_id}' is configured to use {model.gpu_count} GPU(s), "
-                    f"which not fully utilize the selected instance shape with {total_available_gpus} available GPU(s). "
-                    "This configuration may lead to suboptimal performance for a single-model deployment. "
-                    "Consider adjusting the GPU allocation to better utilize the available resources and maximize performance."
-                )
-                logger.warning(error_message)
-                # raise ConfigValidationError(error_message)
+        if sum_model_gpus < total_available_gpus:
+            error_message = (
+                f"Selected models are configured to use {sum_model_gpus} GPU(s), "
+                f"which not fully utilize the selected instance shape with {total_available_gpus} available GPU(s). "
+                "This configuration may lead to suboptimal performance for a multi-model deployment. "
+                "Consider adjusting the GPU allocation to better utilize the available resources and maximize performance."
+            )
+            logger.warning(error_message)
+            # raise ConfigValidationError(error_message)
 
         # Check that the total GPU count for the model group does not exceed the instance capacity.
         if sum_model_gpus > total_available_gpus:
diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py
index 7bdc03cb0..ac93ed23f 100644
--- a/ads/aqua/modeldeployment/utils.py
+++ b/ads/aqua/modeldeployment/utils.py
@@ -417,7 +417,7 @@ def _verify_compatibility(
                 current_gpus_available = total_gpus_available
                 while (
                     current_gpus_available >= minimal_gpus_needed
-                    or current_gpus_available == 1
+                    # or current_gpus_available == 1
                 ):
                     for combination in combinations:
                         if (
@@ -434,16 +434,16 @@ def _verify_compatibility(
                                 ],
                             )
 
-                    current_gpus_available -= 2
-                    current_gpus_available = (
-                        1 if current_gpus_available == 0 else current_gpus_available
-                    )
+                    current_gpus_available -= 1
+                    # current_gpus_available = (
+                    #     1 if current_gpus_available == 0 else current_gpus_available
+                    # )
         else:
             combinations = self.get_combinations(model_gpu_dict_copy)
             current_gpus_available = total_gpus_available
             while (
                 current_gpus_available >= minimal_gpus_needed
-                or current_gpus_available == 1
+                # or current_gpus_available == 1
             ):
                 minimal_difference = float("inf")  # gets the positive infinity
                 optimal_combination = []
@@ -472,10 +472,10 @@ def _verify_compatibility(
                         ],
                     )
 
-                current_gpus_available -= 2
-                current_gpus_available = (
-                    1 if current_gpus_available == 0 else current_gpus_available
-                )
+                current_gpus_available -= 1
+                # current_gpus_available = (
+                #     1 if current_gpus_available == 0 else current_gpus_available
+                # )
 
         return (False, [])
 

From 35cc316fb46299fed8e1ccfd4a350ea6145c662a Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Thu, 27 Mar 2025 11:41:39 -0700
Subject: [PATCH 120/124] Adds BM.GPU.B4.8 shape

---
 ads/aqua/resources/gpu_shapes_index.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/ads/aqua/resources/gpu_shapes_index.json b/ads/aqua/resources/gpu_shapes_index.json
index 7f6b6b37f..c88155e45 100644
--- a/ads/aqua/resources/gpu_shapes_index.json
+++ b/ads/aqua/resources/gpu_shapes_index.json
@@ -10,6 +10,11 @@
       "gpu_memory_in_gbs": 640,
       "gpu_type": "A100"
     },
+    "BM.GPU.B4.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 320,
+      "gpu_type": "A100"
+    },
     "BM.GPU.H100.8": {
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,

From 11cb745ffac89704c2ae625d8093b4de148a9a31 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Thu, 27 Mar 2025 16:09:19 -0700
Subject: [PATCH 121/124] Enhance params extracting logic

---
 ads/aqua/modeldeployment/deployment.py | 28 ++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 9d9e7c09d..5ea738eeb 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -305,6 +305,7 @@ def create(
             )
             return self._create_multi(
                 aqua_model=aqua_model,
+                model_config_summary=model_config_summary,
                 create_deployment_details=create_deployment_details,
                 container_config=container_config,
             )
@@ -553,6 +554,7 @@ def _create(
     def _create_multi(
         self,
         aqua_model: DataScienceModel,
+        model_config_summary: ModelDeploymentConfigSummary,
         create_deployment_details: CreateModelDeploymentDetails,
         container_config: Dict,
     ) -> AquaDeployment:
@@ -560,6 +562,8 @@ def _create_multi(
 
         Parameters
         ----------
+        model_config_summary : model_config_summary
+            Summary Model Deployment configuration for the group of models.
         aqua_model : DataScienceModel
             An instance of Aqua data science model.
         create_deployment_details : CreateModelDeploymentDetails
@@ -606,14 +610,18 @@ def _create_multi(
             # replaces `--tensor-parallel-size` with model gpu count
             container_params_dict.update({"--tensor-parallel-size": model.gpu_count})
             params = build_params_string(container_params_dict)
-            deployment_config = self.get_deployment_config(model.model_id)
-            multi_model_deployment = deployment_config.configuration.get(
+
+            deployment_config = model_config_summary.deployment_config.get(
+                model.model_id, AquaDeploymentConfig()
+            ).configuration.get(
                 create_deployment_details.instance_shape, ConfigurationItem()
-            ).multi_model_deployment
+            )
+
             # finds the corresponding deployment parameters based on the gpu count
             # and combines them with user's parameters. Existing deployment parameters
             # will be overriden by user's parameters.
-            for item in multi_model_deployment:
+            params_found = False
+            for item in deployment_config.multi_model_deployment:
                 if (
                     model.gpu_count
                     and item.gpu_count
@@ -623,8 +631,20 @@ def _create_multi(
                         get_container_params_type(container_type_key), UNKNOWN
                     )
                     params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
+                    params_found = True
                     break
 
+            if not params_found and deployment_config.parameters:
+                config_parameters = deployment_config.parameters.get(
+                    get_container_params_type(container_type_key), UNKNOWN
+                )
+                params = f"{params} {get_combined_params(config_parameters, user_params)}".strip()
+                params_found = True
+
+            # if no config parameters found, append user parameters directly.
+            if not params_found:
+                params = f"{params} {user_params}".strip()
+
             artifact_path_prefix = model.artifact_location.rstrip("/")
             if ObjectStorageDetails.is_oci_path(artifact_path_prefix):
                 os_path = ObjectStorageDetails.from_path(artifact_path_prefix)

From 51760507c712dff40589eb0a4b7daba4510a8625 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 28 Mar 2025 13:18:53 -0700
Subject: [PATCH 122/124] Fixes by comments

---
 ads/aqua/common/utils.py                          | 1 +
 ads/aqua/modeldeployment/deployment.py            | 2 +-
 tests/unitary/with_extras/aqua/test_deployment.py | 2 --
 tests/unitary/with_extras/aqua/test_evaluation.py | 1 -
 4 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
index 37ec91656..c36fc3cab 100644
--- a/ads/aqua/common/utils.py
+++ b/ads/aqua/common/utils.py
@@ -19,6 +19,7 @@
 from string import Template
 from typing import Any, Dict, List, Optional, Union
 
+import fsspec
 import oci
 from cachetools import TTLCache, cached
 from huggingface_hub.constants import HF_HUB_CACHE
diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py
index 5ea738eeb..0695c9374 100644
--- a/ads/aqua/modeldeployment/deployment.py
+++ b/ads/aqua/modeldeployment/deployment.py
@@ -218,7 +218,7 @@ def create(
             model_ids = [model.model_id for model in create_deployment_details.models]
             try:
                 model_config_summary = self.get_multimodel_deployment_config(
-                    model_ids=model_ids
+                    model_ids=model_ids, compartment_id=compartment_id
                 )
                 if not model_config_summary.gpu_allocation:
                     raise AquaValueError(model_config_summary.error_message)
diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py
index 88726a5e8..c82081b5d 100644
--- a/tests/unitary/with_extras/aqua/test_deployment.py
+++ b/tests/unitary/with_extras/aqua/test_deployment.py
@@ -981,8 +981,6 @@ def test_get_deployment(self, mock_get_resource_name):
             AquaDeployment.__annotations__.keys()
         )
         actual_attributes = result.to_dict()
-        # print(actual_attributes)
-        print(TestDataset.aqua_deployment_detail)
         assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch"
         assert actual_attributes == TestDataset.aqua_deployment_detail
         assert result.log.name == "log-name"
diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py
index d8826e0f4..b54fefd96 100644
--- a/tests/unitary/with_extras/aqua/test_evaluation.py
+++ b/tests/unitary/with_extras/aqua/test_evaluation.py
@@ -630,7 +630,6 @@ def test_validate_model_name(
                 mock_model, mock_create_aqua_evaluation_details
             )
         except AquaError as e:
-            print(str(e))
             self.assertEqual(str(e), expected_message)
 
     def test_get_service_model_name(self):

From c797d95dc07bc6d51d10c3e44cc6392695fcb482 Mon Sep 17 00:00:00 2001
From: Lu Peng <118394507+lu-ohai@users.noreply.github.com>
Date: Fri, 28 Mar 2025 16:54:19 -0400
Subject: [PATCH 123/124] Update model.py to fix format

---
 ads/aqua/model/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 4fd54ff77..b225b3745 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -400,7 +400,7 @@ def create_multi(
             metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA,
             artifact_path_or_content=json.dumps(
                 [model.model_dump() for model in models]
-            ),
+            ).encode(),
             path_type=MetadataArtifactPathType.CONTENT,
         )
 

From a8ed99a5a238d57967233a4806ed89138d74f6b1 Mon Sep 17 00:00:00 2001
From: Dmitrii Cherkasov <dmitrii.cherkasov@oracle.com>
Date: Fri, 28 Mar 2025 14:48:12 -0700
Subject: [PATCH 124/124] Fixes docs link

---
 ads/aqua/evaluation/evaluation.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py
index 67aedc613..877030459 100644
--- a/ads/aqua/evaluation/evaluation.py
+++ b/ads/aqua/evaluation/evaluation.py
@@ -142,7 +142,7 @@ def create(
         **kwargs,
     ) -> "AquaEvaluationSummary":
         """Creates Aqua evaluation for resource.\n
-        For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/f271ca63d12e3c256718f23a14d93da4b4fc086b/ai-quick-actions/cli-tips.md#create-model-evaluation
+        For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#model-evaluation
 
         Parameters
         ----------
@@ -504,9 +504,7 @@ def create(
                 metrics=create_aqua_evaluation_details.metrics,
                 inference_configuration=eval_inference_configuration or {},
             )
-        ).create(
-            **kwargs
-        )  ## TODO: decide what parameters will be needed
+        ).create(**kwargs)  ## TODO: decide what parameters will be needed
         logger.debug(
             f"Successfully created evaluation job {evaluation_job.id} for {create_aqua_evaluation_details.evaluation_source_id}."
         )