|
7 | 7 | from oci.data_science.models import ModelDeployment, ModelDeploymentSummary
|
8 | 8 | from pydantic import BaseModel, Field, model_validator
|
9 | 9 |
|
| 10 | +from ads.aqua import logger |
10 | 11 | from ads.aqua.common.entities import AquaMultiModelRef, ShapeInfo
|
11 | 12 | from ads.aqua.common.enums import Tags
|
12 | 13 | from ads.aqua.config.utils.serializer import Serializable
|
@@ -142,101 +143,6 @@ class Config:
|
142 | 143 | extra = "ignore"
|
143 | 144 |
|
144 | 145 |
|
145 |
| -class CreateModelDeploymentDetails(BaseModel): |
146 |
| - """Class for creating Aqua model deployments.""" |
147 |
| - |
148 |
| - instance_shape: str = Field( |
149 |
| - ..., description="The instance shape used for deployment." |
150 |
| - ) |
151 |
| - display_name: str = Field(..., description="The name of the model deployment.") |
152 |
| - compartment_id: Optional[str] = Field(None, description="The compartment OCID.") |
153 |
| - project_id: Optional[str] = Field(None, description="The project OCID.") |
154 |
| - description: Optional[str] = Field( |
155 |
| - None, description="The description of the deployment." |
156 |
| - ) |
157 |
| - model_id: Optional[str] = Field(None, description="The model OCID to deploy.") |
158 |
| - models: Optional[List[AquaMultiModelRef]] = Field( |
159 |
| - None, description="List of models for multimodel deployment." |
160 |
| - ) |
161 |
| - instance_count: int = Field( |
162 |
| - None, description="Number of instances used for deployment." |
163 |
| - ) |
164 |
| - log_group_id: Optional[str] = Field( |
165 |
| - None, description="OCI logging group ID for logs." |
166 |
| - ) |
167 |
| - access_log_id: Optional[str] = Field( |
168 |
| - None, |
169 |
| - description="OCID for access logs. " |
170 |
| - "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm", |
171 |
| - ) |
172 |
| - predict_log_id: Optional[str] = Field( |
173 |
| - None, |
174 |
| - description="OCID for prediction logs." |
175 |
| - "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm", |
176 |
| - ) |
177 |
| - bandwidth_mbps: Optional[int] = Field( |
178 |
| - None, description="Bandwidth limit on the load balancer in Mbps." |
179 |
| - ) |
180 |
| - web_concurrency: Optional[int] = Field( |
181 |
| - None, description="Number of worker processes/threads for handling requests." |
182 |
| - ) |
183 |
| - server_port: Optional[int] = Field( |
184 |
| - None, description="Server port for the Docker container image." |
185 |
| - ) |
186 |
| - health_check_port: Optional[int] = Field( |
187 |
| - None, description="Health check port for the Docker container image." |
188 |
| - ) |
189 |
| - env_var: Optional[Dict[str, str]] = Field( |
190 |
| - default_factory=dict, description="Environment variables for deployment." |
191 |
| - ) |
192 |
| - container_family: Optional[str] = Field( |
193 |
| - None, description="Image family of the model deployment container runtime." |
194 |
| - ) |
195 |
| - memory_in_gbs: Optional[float] = Field( |
196 |
| - None, description="Memory (in GB) for the selected shape." |
197 |
| - ) |
198 |
| - ocpus: Optional[float] = Field( |
199 |
| - None, description="OCPU count for the selected shape." |
200 |
| - ) |
201 |
| - model_file: Optional[str] = Field( |
202 |
| - None, description="File used for model deployment." |
203 |
| - ) |
204 |
| - private_endpoint_id: Optional[str] = Field( |
205 |
| - None, description="Private endpoint ID for model deployment." |
206 |
| - ) |
207 |
| - container_image_uri: Optional[str] = Field( |
208 |
| - None, |
209 |
| - description="Image URI for model deployment container runtime " |
210 |
| - "(ignored for service-managed containers). " |
211 |
| - "Required parameter for BYOC based deployments if this parameter was not set during " |
212 |
| - "model registration.", |
213 |
| - ) |
214 |
| - cmd_var: Optional[List[str]] = Field( |
215 |
| - None, description="Command variables for the container runtime." |
216 |
| - ) |
217 |
| - freeform_tags: Optional[Dict] = Field( |
218 |
| - None, description="Freeform tags for model deployment." |
219 |
| - ) |
220 |
| - defined_tags: Optional[Dict] = Field( |
221 |
| - None, description="Defined tags for model deployment." |
222 |
| - ) |
223 |
| - |
224 |
| - @model_validator(mode="before") |
225 |
| - @classmethod |
226 |
| - def validate(cls, values: Any) -> Any: |
227 |
| - """Ensures exactly one of `model_id` or `models` is provided.""" |
228 |
| - model_id = values.get("model_id") |
229 |
| - models = values.get("models") |
230 |
| - if bool(model_id) == bool(models): # Both set or both unset |
231 |
| - raise ValueError( |
232 |
| - "Exactly one of `model_id` or `models` must be provided to create a model deployment." |
233 |
| - ) |
234 |
| - return values |
235 |
| - |
236 |
| - class Config: |
237 |
| - extra = "ignore" |
238 |
| - |
239 |
| - |
240 | 146 | class ShapeInfoConfig(Serializable):
|
241 | 147 | """Describes how many memory and cpu to this model for specific shape.
|
242 | 148 |
|
@@ -382,6 +288,17 @@ class GPUShapeAllocation(Serializable):
|
382 | 288 | class Config:
|
383 | 289 | extra = "allow"
|
384 | 290 |
|
| 291 | +class ConfigValidationError(Exception): |
| 292 | + """Exception raised for config validation.""" |
| 293 | + |
| 294 | + def __init__( |
| 295 | + self, |
| 296 | + message: str = """Validation failed: The provided model group configuration is incompatible with the selected instance shape. |
| 297 | + Please verify the GPU count per model and ensure multi-model deployment is supported for the chosen instance shape.""", |
| 298 | + ): |
| 299 | + super().__init__( |
| 300 | + message |
| 301 | + ) |
385 | 302 |
|
386 | 303 | class ModelDeploymentConfigSummary(Serializable):
|
387 | 304 | """Top-level configuration model for OCI-based deployments.
|
@@ -413,3 +330,189 @@ class ModelDeploymentConfigSummary(Serializable):
|
413 | 330 |
|
414 | 331 | class Config:
|
415 | 332 | extra = "allow"
|
| 333 | + |
| 334 | + |
| 335 | +class CreateModelDeploymentDetails(BaseModel): |
| 336 | + """Class for creating Aqua model deployments.""" |
| 337 | + |
| 338 | + instance_shape: str = Field( |
| 339 | + ..., description="The instance shape used for deployment." |
| 340 | + ) |
| 341 | + display_name: str = Field(..., description="The name of the model deployment.") |
| 342 | + compartment_id: Optional[str] = Field(None, description="The compartment OCID.") |
| 343 | + project_id: Optional[str] = Field(None, description="The project OCID.") |
| 344 | + description: Optional[str] = Field( |
| 345 | + None, description="The description of the deployment." |
| 346 | + ) |
| 347 | + model_id: Optional[str] = Field(None, description="The model OCID to deploy.") |
| 348 | + models: Optional[List[AquaMultiModelRef]] = Field( |
| 349 | + None, description="List of models for multimodel deployment." |
| 350 | + ) |
| 351 | + instance_count: int = Field( |
| 352 | + None, description="Number of instances used for deployment." |
| 353 | + ) |
| 354 | + log_group_id: Optional[str] = Field( |
| 355 | + None, description="OCI logging group ID for logs." |
| 356 | + ) |
| 357 | + access_log_id: Optional[str] = Field( |
| 358 | + None, |
| 359 | + description="OCID for access logs. " |
| 360 | + "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm", |
| 361 | + ) |
| 362 | + predict_log_id: Optional[str] = Field( |
| 363 | + None, |
| 364 | + description="OCID for prediction logs." |
| 365 | + "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm", |
| 366 | + ) |
| 367 | + bandwidth_mbps: Optional[int] = Field( |
| 368 | + None, description="Bandwidth limit on the load balancer in Mbps." |
| 369 | + ) |
| 370 | + web_concurrency: Optional[int] = Field( |
| 371 | + None, description="Number of worker processes/threads for handling requests." |
| 372 | + ) |
| 373 | + server_port: Optional[int] = Field( |
| 374 | + None, description="Server port for the Docker container image." |
| 375 | + ) |
| 376 | + health_check_port: Optional[int] = Field( |
| 377 | + None, description="Health check port for the Docker container image." |
| 378 | + ) |
| 379 | + env_var: Optional[Dict[str, str]] = Field( |
| 380 | + default_factory=dict, description="Environment variables for deployment." |
| 381 | + ) |
| 382 | + container_family: Optional[str] = Field( |
| 383 | + None, description="Image family of the model deployment container runtime." |
| 384 | + ) |
| 385 | + memory_in_gbs: Optional[float] = Field( |
| 386 | + None, description="Memory (in GB) for the selected shape." |
| 387 | + ) |
| 388 | + ocpus: Optional[float] = Field( |
| 389 | + None, description="OCPU count for the selected shape." |
| 390 | + ) |
| 391 | + model_file: Optional[str] = Field( |
| 392 | + None, description="File used for model deployment." |
| 393 | + ) |
| 394 | + private_endpoint_id: Optional[str] = Field( |
| 395 | + None, description="Private endpoint ID for model deployment." |
| 396 | + ) |
| 397 | + container_image_uri: Optional[str] = Field( |
| 398 | + None, |
| 399 | + description="Image URI for model deployment container runtime " |
| 400 | + "(ignored for service-managed containers). " |
| 401 | + "Required parameter for BYOC based deployments if this parameter was not set during " |
| 402 | + "model registration.", |
| 403 | + ) |
| 404 | + cmd_var: Optional[List[str]] = Field( |
| 405 | + None, description="Command variables for the container runtime." |
| 406 | + ) |
| 407 | + freeform_tags: Optional[Dict] = Field( |
| 408 | + None, description="Freeform tags for model deployment." |
| 409 | + ) |
| 410 | + defined_tags: Optional[Dict] = Field( |
| 411 | + None, description="Defined tags for model deployment." |
| 412 | + ) |
| 413 | + |
| 414 | + @model_validator(mode="before") |
| 415 | + @classmethod |
| 416 | + def validate(cls, values: Any) -> Any: |
| 417 | + """Ensures exactly one of `model_id` or `models` is provided.""" |
| 418 | + model_id = values.get("model_id") |
| 419 | + models = values.get("models") |
| 420 | + if bool(model_id) == bool(models): # Both set or both unset |
| 421 | + raise ValueError( |
| 422 | + "Exactly one of `model_id` or `models` must be provided to create a model deployment." |
| 423 | + ) |
| 424 | + return values |
| 425 | + |
| 426 | + def validate_multimodel_deployment_feasibility(self, models_config_summary: ModelDeploymentConfigSummary): |
| 427 | + """ |
| 428 | + Validates whether the user input of a model group (List[AquaMultiModelRef], 2+ models with a specified gpu count per model) |
| 429 | + is feasible for a multi model deployment on the user's selected shape (instance_shape) |
| 430 | +
|
| 431 | + Validation Criteria: |
| 432 | + - GPU Capacity: Ensures that the total number of GPUs requested by all models in the group does not exceed the GPU capacity of the selected instance shape. |
| 433 | + - Verifies that all models in the group are compatible with the selected instance shape. |
| 434 | + - Ensures that each model’s GPU allocation, as specified by the user, matches the requirements in the model's deployment configuration. |
| 435 | + - Confirms that the selected instance shape supports multi-model deployment. |
| 436 | + - Requires user input for the model group to be considered a valid multi-model deployment. |
| 437 | +
|
| 438 | +
|
| 439 | + Parameters |
| 440 | + ---------- |
| 441 | + models_config_summary : ModelDeploymentConfigSummary, optional |
| 442 | + An instance of ModelDeploymentConfigSummary containing all required |
| 443 | + fields (GPU Allocation, Deployment Configuration) for creating a multi model deployment via Aqua. |
| 444 | +
|
| 445 | + Raises |
| 446 | + ------- |
| 447 | + ConfigValidationError: |
| 448 | + When the deployment is NOT a multi model deployment |
| 449 | + When assigned GPU Allocations per model are NOT within the number of GPUs available in the instance shape |
| 450 | + When all models in model group can NOT be deployed on the instance shape with the selected GPU count |
| 451 | + """ |
| 452 | + if not self.models: |
| 453 | + logger.error( |
| 454 | + "User defined model group (List[AquaMultiModelRef]) is None." |
| 455 | + ) |
| 456 | + raise ConfigValidationError("Multi-model deployment requires at least one model, but none were provided. Please add one or more models to the model group to proceed.") |
| 457 | + |
| 458 | + selected_shape = self.instance_shape |
| 459 | + |
| 460 | + if selected_shape not in models_config_summary.gpu_allocation: |
| 461 | + logger.error( |
| 462 | + f"The model group is not compatible with the selected instance shape {selected_shape}" |
| 463 | + ) |
| 464 | + raise ConfigValidationError(f"The model group is not compatible with the selected instance shape '{selected_shape}'. Select a different instance shape.") |
| 465 | + |
| 466 | + total_available_gpus = models_config_summary.gpu_allocation[selected_shape].total_gpus_available |
| 467 | + |
| 468 | + model_deployment_config = models_config_summary.deployment_config |
| 469 | + |
| 470 | + required_model_keys = [model.model_id for model in self.models] |
| 471 | + missing_model_keys = required_model_keys - model_deployment_config.keys() |
| 472 | + |
| 473 | + if len(missing_model_keys) > 0: |
| 474 | + logger.error( |
| 475 | + f"Missing the following model entry with key {missing_model_keys} in ModelDeploymentConfigSummary" |
| 476 | + ) |
| 477 | + raise ConfigValidationError("One or more selected models are missing from the configuration, preventing validation for deployment on the given shape.") |
| 478 | + |
| 479 | + sum_model_gpus = 0 |
| 480 | + |
| 481 | + for model in self.models: |
| 482 | + sum_model_gpus += model.gpu_count |
| 483 | + |
| 484 | + aqua_deployment_config = model_deployment_config[model.model_id] |
| 485 | + |
| 486 | + if selected_shape not in aqua_deployment_config.shape: |
| 487 | + logger.error( |
| 488 | + f"Model with OCID {model.model_id} in the model group is not compatible with the selected instance shape: {selected_shape}" |
| 489 | + ) |
| 490 | + raise ConfigValidationError( |
| 491 | + "Select a different instance shape. One or more models in the group are incompatible with the selected instance shape." |
| 492 | + ) |
| 493 | + |
| 494 | + |
| 495 | + multi_model_configs = aqua_deployment_config.configuration.get( |
| 496 | + selected_shape, ConfigurationItem() |
| 497 | + ).multi_model_deployment |
| 498 | + |
| 499 | + valid_gpu_configurations = [gpu_shape_config.gpu_count for gpu_shape_config in multi_model_configs] |
| 500 | + if model.gpu_count not in valid_gpu_configurations: |
| 501 | + valid_gpu_str = ", ".join(map(str, valid_gpu_configurations)) |
| 502 | + logger.error( |
| 503 | + f"Model {model.model_id} allocated {model.gpu_count} GPUs by user, but its deployment configuration requires either {valid_gpu_str} GPUs." |
| 504 | + ) |
| 505 | + raise ConfigValidationError( |
| 506 | + "Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape." |
| 507 | + ) |
| 508 | + |
| 509 | + if sum_model_gpus > total_available_gpus: |
| 510 | + logger.error( |
| 511 | + f"Selected shape {selected_shape} has {total_available_gpus} GPUs while model group has {sum_model_gpus} GPUs." |
| 512 | + ) |
| 513 | + raise ConfigValidationError( |
| 514 | + "Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape." |
| 515 | + ) |
| 516 | + |
| 517 | + class Config: |
| 518 | + extra = "ignore" |
0 commit comments