oracle
diff --git a/‎README-development.md
Lines changed: 1 addition & 1 deletion b/‎README-development.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/aqua/modeldeployment/deployment.py
Lines changed: 17 additions & 0 deletions b/‎ads/aqua/modeldeployment/deployment.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎ads/aqua/modeldeployment/entities.py
Lines changed: 198 additions & 95 deletions b/‎ads/aqua/modeldeployment/entities.py
Lines changed: 198 additions & 95 deletions
@@ -248,7 +248,7 @@ All the unit tests can be found [here](https://github.com/oracle/accelerated-dat
 The following commands detail how the unit tests can be run.
 ```
 # Run all tests in AQUA project
-python -m pytest -q tests/unitary/with_extras/aqua/test_deployment.py
+python -m pytest -q tests/unitary/with_extras/aqua/*
 
 # Run all tests specific to a module within in AQUA project (ex. test_deployment.py, test_model.py, etc.)
 python -m pytest -q tests/unitary/with_extras/aqua/test_deployment.py
 
@@ -41,6 +41,7 @@
     AquaDeploymentConfig,
     AquaDeploymentDetail,
     ConfigurationItem,
+    ConfigValidationError,
     CreateModelDeploymentDetails,
     ModelDeploymentConfigSummary,
 )
@@ -156,6 +157,22 @@ def create(
                 defined_tags=defined_tags,
             )
         else:
+            model_ids = [model.model_id for model in create_deployment_details.models]
+
+            try:
+                model_config_summary = self.get_multimodel_deployment_config(
+                    model_ids=model_ids
+                )
+
+                if not model_config_summary.gpu_allocation:
+                    raise AquaValueError(model_config_summary.error_message)
+
+                create_deployment_details.validate_multimodel_deployment_feasibility(
+                    models_config_summary=model_config_summary
+                )
+            except ConfigValidationError as err:
+                raise AquaValueError(f"{err}") from err
+
             aqua_model = model_app.create_multi(
                 models=create_deployment_details.models,
                 compartment_id=compartment_id,
 
@@ -7,6 +7,7 @@
 from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
 from pydantic import BaseModel, Field, model_validator
 
+from ads.aqua import logger
 from ads.aqua.common.entities import AquaMultiModelRef, ShapeInfo
 from ads.aqua.common.enums import Tags
 from ads.aqua.config.utils.serializer import Serializable
@@ -142,101 +143,6 @@ class Config:
         extra = "ignore"
 
 
-class CreateModelDeploymentDetails(BaseModel):
-    """Class for creating Aqua model deployments."""
-
-    instance_shape: str = Field(
-        ..., description="The instance shape used for deployment."
-    )
-    display_name: str = Field(..., description="The name of the model deployment.")
-    compartment_id: Optional[str] = Field(None, description="The compartment OCID.")
-    project_id: Optional[str] = Field(None, description="The project OCID.")
-    description: Optional[str] = Field(
-        None, description="The description of the deployment."
-    )
-    model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
-    models: Optional[List[AquaMultiModelRef]] = Field(
-        None, description="List of models for multimodel deployment."
-    )
-    instance_count: int = Field(
-        None, description="Number of instances used for deployment."
-    )
-    log_group_id: Optional[str] = Field(
-        None, description="OCI logging group ID for logs."
-    )
-    access_log_id: Optional[str] = Field(
-        None,
-        description="OCID for access logs. "
-        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
-    )
-    predict_log_id: Optional[str] = Field(
-        None,
-        description="OCID for prediction logs."
-        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
-    )
-    bandwidth_mbps: Optional[int] = Field(
-        None, description="Bandwidth limit on the load balancer in Mbps."
-    )
-    web_concurrency: Optional[int] = Field(
-        None, description="Number of worker processes/threads for handling requests."
-    )
-    server_port: Optional[int] = Field(
-        None, description="Server port for the Docker container image."
-    )
-    health_check_port: Optional[int] = Field(
-        None, description="Health check port for the Docker container image."
-    )
-    env_var: Optional[Dict[str, str]] = Field(
-        default_factory=dict, description="Environment variables for deployment."
-    )
-    container_family: Optional[str] = Field(
-        None, description="Image family of the model deployment container runtime."
-    )
-    memory_in_gbs: Optional[float] = Field(
-        None, description="Memory (in GB) for the selected shape."
-    )
-    ocpus: Optional[float] = Field(
-        None, description="OCPU count for the selected shape."
-    )
-    model_file: Optional[str] = Field(
-        None, description="File used for model deployment."
-    )
-    private_endpoint_id: Optional[str] = Field(
-        None, description="Private endpoint ID for model deployment."
-    )
-    container_image_uri: Optional[str] = Field(
-        None,
-        description="Image URI for model deployment container runtime "
-        "(ignored for service-managed containers). "
-        "Required parameter for BYOC based deployments if this parameter was not set during "
-        "model registration.",
-    )
-    cmd_var: Optional[List[str]] = Field(
-        None, description="Command variables for the container runtime."
-    )
-    freeform_tags: Optional[Dict] = Field(
-        None, description="Freeform tags for model deployment."
-    )
-    defined_tags: Optional[Dict] = Field(
-        None, description="Defined tags for model deployment."
-    )
-
-    @model_validator(mode="before")
-    @classmethod
-    def validate(cls, values: Any) -> Any:
-        """Ensures exactly one of `model_id` or `models` is provided."""
-        model_id = values.get("model_id")
-        models = values.get("models")
-        if bool(model_id) == bool(models):  # Both set or both unset
-            raise ValueError(
-                "Exactly one of `model_id` or `models` must be provided to create a model deployment."
-            )
-        return values
-
-    class Config:
-        extra = "ignore"
-
-
 class ShapeInfoConfig(Serializable):
     """Describes how many memory and cpu to this model for specific shape.
 
@@ -382,6 +288,17 @@ class GPUShapeAllocation(Serializable):
     class Config:
         extra = "allow"
 
+class ConfigValidationError(Exception):
+    """Exception raised for config validation."""
+
+    def __init__(
+        self,
+        message: str = """Validation failed: The provided model group configuration is incompatible with the selected instance shape.
+        Please verify the GPU count per model and ensure multi-model deployment is supported for the chosen instance shape.""",
+    ):
+        super().__init__(
+            message
+        )
 
 class ModelDeploymentConfigSummary(Serializable):
     """Top-level configuration model for OCI-based deployments.
@@ -413,3 +330,189 @@ class ModelDeploymentConfigSummary(Serializable):
 
     class Config:
         extra = "allow"
+
+
+class CreateModelDeploymentDetails(BaseModel):
+    """Class for creating Aqua model deployments."""
+
+    instance_shape: str = Field(
+        ..., description="The instance shape used for deployment."
+    )
+    display_name: str = Field(..., description="The name of the model deployment.")
+    compartment_id: Optional[str] = Field(None, description="The compartment OCID.")
+    project_id: Optional[str] = Field(None, description="The project OCID.")
+    description: Optional[str] = Field(
+        None, description="The description of the deployment."
+    )
+    model_id: Optional[str] = Field(None, description="The model OCID to deploy.")
+    models: Optional[List[AquaMultiModelRef]] = Field(
+        None, description="List of models for multimodel deployment."
+    )
+    instance_count: int = Field(
+        None, description="Number of instances used for deployment."
+    )
+    log_group_id: Optional[str] = Field(
+        None, description="OCI logging group ID for logs."
+    )
+    access_log_id: Optional[str] = Field(
+        None,
+        description="OCID for access logs. "
+        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
+    )
+    predict_log_id: Optional[str] = Field(
+        None,
+        description="OCID for prediction logs."
+        "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm",
+    )
+    bandwidth_mbps: Optional[int] = Field(
+        None, description="Bandwidth limit on the load balancer in Mbps."
+    )
+    web_concurrency: Optional[int] = Field(
+        None, description="Number of worker processes/threads for handling requests."
+    )
+    server_port: Optional[int] = Field(
+        None, description="Server port for the Docker container image."
+    )
+    health_check_port: Optional[int] = Field(
+        None, description="Health check port for the Docker container image."
+    )
+    env_var: Optional[Dict[str, str]] = Field(
+        default_factory=dict, description="Environment variables for deployment."
+    )
+    container_family: Optional[str] = Field(
+        None, description="Image family of the model deployment container runtime."
+    )
+    memory_in_gbs: Optional[float] = Field(
+        None, description="Memory (in GB) for the selected shape."
+    )
+    ocpus: Optional[float] = Field(
+        None, description="OCPU count for the selected shape."
+    )
+    model_file: Optional[str] = Field(
+        None, description="File used for model deployment."
+    )
+    private_endpoint_id: Optional[str] = Field(
+        None, description="Private endpoint ID for model deployment."
+    )
+    container_image_uri: Optional[str] = Field(
+        None,
+        description="Image URI for model deployment container runtime "
+        "(ignored for service-managed containers). "
+        "Required parameter for BYOC based deployments if this parameter was not set during "
+        "model registration.",
+    )
+    cmd_var: Optional[List[str]] = Field(
+        None, description="Command variables for the container runtime."
+    )
+    freeform_tags: Optional[Dict] = Field(
+        None, description="Freeform tags for model deployment."
+    )
+    defined_tags: Optional[Dict] = Field(
+        None, description="Defined tags for model deployment."
+    )
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate(cls, values: Any) -> Any:
+        """Ensures exactly one of `model_id` or `models` is provided."""
+        model_id = values.get("model_id")
+        models = values.get("models")
+        if bool(model_id) == bool(models):  # Both set or both unset
+            raise ValueError(
+                "Exactly one of `model_id` or `models` must be provided to create a model deployment."
+            )
+        return values
+
+    def validate_multimodel_deployment_feasibility(self, models_config_summary: ModelDeploymentConfigSummary):
+        """
+        Validates whether the user input of a model group (List[AquaMultiModelRef], 2+ models with a specified gpu count per model)
+        is feasible for a multi model deployment on the user's selected shape (instance_shape)
+
+        Validation Criteria:
+            - GPU Capacity: Ensures that the total number of GPUs requested by all models in the group does not exceed the GPU capacity of the selected instance shape.  
+            - Verifies that all models in the group are compatible with the selected instance shape.
+            - Ensures that each model’s GPU allocation, as specified by the user, matches the requirements in the model's deployment configuration.
+            - Confirms that the selected instance shape supports multi-model deployment.
+            - Requires user input for the model group to be considered a valid multi-model deployment.
+
+
+        Parameters
+        ----------
+        models_config_summary : ModelDeploymentConfigSummary, optional
+            An instance of ModelDeploymentConfigSummary containing all required
+            fields (GPU Allocation, Deployment Configuration) for creating a multi model deployment via Aqua.
+
+        Raises
+        -------
+        ConfigValidationError:
+            When the deployment is NOT a multi model deployment
+            When assigned GPU Allocations per model are NOT within the number of GPUs available in the instance shape
+            When all models in model group can NOT be deployed on the instance shape with the selected GPU count
+        """
+        if not self.models:
+            logger.error(
+                "User defined model group (List[AquaMultiModelRef]) is None."
+            )
+            raise ConfigValidationError("Multi-model deployment requires at least one model, but none were provided. Please add one or more models to the model group to proceed.")
+
+        selected_shape = self.instance_shape
+
+        if selected_shape not in models_config_summary.gpu_allocation:
+            logger.error(
+                    f"The model group is not compatible with the selected instance shape {selected_shape}"
+                )
+            raise ConfigValidationError(f"The model group is not compatible with the selected instance shape '{selected_shape}'. Select a different instance shape.")
+
+        total_available_gpus = models_config_summary.gpu_allocation[selected_shape].total_gpus_available
+
+        model_deployment_config = models_config_summary.deployment_config
+
+        required_model_keys = [model.model_id for model in self.models]
+        missing_model_keys = required_model_keys - model_deployment_config.keys()
+
+        if len(missing_model_keys) > 0:
+            logger.error(
+                    f"Missing the following model entry with key {missing_model_keys} in ModelDeploymentConfigSummary"
+                )
+            raise ConfigValidationError("One or more selected models are missing from the configuration, preventing validation for deployment on the given shape.")
+
+        sum_model_gpus = 0
+
+        for model in self.models:
+            sum_model_gpus += model.gpu_count
+
+            aqua_deployment_config = model_deployment_config[model.model_id]
+
+            if selected_shape not in aqua_deployment_config.shape:
+                logger.error(
+                    f"Model with OCID {model.model_id} in the model group is not compatible with the selected instance shape: {selected_shape}"
+                )
+                raise ConfigValidationError(
+                    "Select a different instance shape. One or more models in the group are incompatible with the selected instance shape."
+                )
+
+
+            multi_model_configs = aqua_deployment_config.configuration.get(
+                selected_shape, ConfigurationItem()
+                ).multi_model_deployment
+
+            valid_gpu_configurations = [gpu_shape_config.gpu_count for gpu_shape_config in multi_model_configs]
+            if model.gpu_count not in valid_gpu_configurations:
+                valid_gpu_str = ", ".join(map(str, valid_gpu_configurations))
+                logger.error(
+                    f"Model {model.model_id} allocated {model.gpu_count} GPUs by user, but its deployment configuration requires either {valid_gpu_str} GPUs."
+                )
+                raise ConfigValidationError(
+                    "Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape."
+                )
+
+        if sum_model_gpus > total_available_gpus:
+            logger.error(
+                f"Selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs."
+            )
+            raise ConfigValidationError(
+                "Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape."
+            )
+
+    class Config:
+        extra = "ignore"