oracle · elizjo · Jul 7, 2025 · Jul 14, 2025 · Jul 25, 2025 · Jul 25, 2025
@@ -14,6 +14,7 @@
 from ads.aqua.finetuning import AquaFineTuningApp
 from ads.aqua.model import AquaModelApp
 from ads.aqua.modeldeployment import AquaDeploymentApp
+from ads.aqua.shaperecommend.recommend import AquaRecommendApp
 from ads.aqua.verify_policies import AquaVerifyPoliciesApp
 from ads.common.utils import LOG_LEVELS
 
@@ -31,6 +32,7 @@ class AquaCommand:
     deployment = AquaDeploymentApp
     evaluation = AquaEvaluationApp
     verify_policies = AquaVerifyPoliciesApp
+    recommend = AquaRecommendApp
 
     def __init__(
         self,
@@ -96,18 +98,20 @@ def _validate_value(flag, value):
                 "If you intend to chain a function call to the result, please separate the "
                 "flag and the subsequent function call with separator `-`."
             )
-    
+
     @staticmethod
     def install():
         """Install ADS Aqua Extension from wheel file. Set enviroment variable `AQUA_EXTENSTION_PATH` to change the wheel file path.
 
-        Return 
+        Return
         ------
         int:
             Installatation status.
         """
         import subprocess
 
-        wheel_file_path = os.environ.get("AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl")
-        status =  subprocess.run(f"pip install {wheel_file_path}",shell=True)
-        return status.check_returncode
+        wheel_file_path = os.environ.get(
+            "AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
+        )
+        status = subprocess.run(f"pip install {wheel_file_path}", shell=True, check=False)
+        return status.check_returncode
@@ -46,6 +46,17 @@ class Config:
         arbitrary_types_allowed = True
         protected_namespaces = ()
 
+class ComputeRank(Serializable):
+    """
+    Represents the cost and performance ranking for a compute shape.
+    """
+    cost: int = Field(
+    None, description="The relative rank of the cost of the shape. Range is [10 (cost-effective), 100 (most-expensive)]"
+    )
+
+    performance: int = Field(
+    None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
+    )
 
 class GPUSpecs(Serializable):
     """
@@ -61,6 +72,12 @@ class GPUSpecs(Serializable):
     gpu_type: Optional[str] = Field(
         default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
     )
+    quantization: Optional[List[str]] = Field(
+        default_factory=list, description="The quantization format supported by shape. (ex.  bitsandbytes, fp8, etc.)"
+    )
+    ranking: Optional[ComputeRank] = Field(
+        None, description="The relative rank of the cost and performance of the shape."
+    )
 
 
 class GPUShapesIndex(Serializable):
@@ -84,6 +101,10 @@ class ComputeShapeSummary(Serializable):
     including CPU, memory, and optional GPU characteristics.
     """
 
+    available: Optional[bool] = Field(
+        default = False,
+        description="True if shape is available on user tenancy, "
+    )
     core_count: Optional[int] = Field(
         default=None,
         description="Total number of CPU cores available for the compute shape.",

@@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
     def __init__(self, reason, status=403, service_payload=None):
         super().__init__(reason, status, service_payload)
 
+class AquaRecommendationError(AquaError):
+    """Exception raised for models incompatible with shape recommendation tool."""
+
+    def __init__(self, reason, status=400, service_payload=None):
+        super().__init__(reason, status, service_payload)
 
 class AquaFileNotFoundError(AquaError, FileNotFoundError):
     """Exception raised for missing target file."""

@@ -13,6 +13,7 @@
 from ads.aqua.extension.evaluation_handler import __handlers__ as __eval_handlers__
 from ads.aqua.extension.finetune_handler import __handlers__ as __finetune_handlers__
 from ads.aqua.extension.model_handler import __handlers__ as __model_handlers__
+from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__
 from ads.aqua.extension.ui_handler import __handlers__ as __ui_handlers__
 from ads.aqua.extension.ui_websocket_handler import __handlers__ as __ws_handlers__
 
@@ -24,6 +25,7 @@
     + __ui_handlers__
     + __eval_handlers__
     + __ws_handlers__
+    + __gpu_handlers__
 )
 
 

@@ -0,0 +1,47 @@
+from tornado.web import HTTPError
+
+from ads.aqua.common.decorator import handle_exceptions
+from ads.aqua.extension.base_handler import AquaAPIhandler
+from ads.aqua.extension.errors import Errors
+from ads.aqua.shaperecommend.recommend import AquaRecommendApp
+from ads.config import COMPARTMENT_OCID
+
+
+class AquaRecommendHandler(AquaAPIhandler):
+    """
+    Handler for Aqua GPU Recommendation REST APIs.
+
+    Methods
+    -------
+    post(self, *args, **kwargs)
+        Obtains the eligible compute shapes that would fit the specifed model, context length, model weights, and quantization level.
+
+    Raises
+    ------
+    HTTPError: For various failure scenarios such as invalid input format, missing data, etc.
+    """
+
+    @handle_exceptions
+    def post(self, *args, **kwargs):  # noqa: ARG002
+        """
+        Obtains the eligible compute shapes that would fit the specifed model, context length, model weights, and quantization level.
+
+        Returns
+        -------
+        ShapeRecommendationReport
+            Report containing shape recommendations and troubleshooting advice, if any.
+        """
+        try:
+            input_data = self.get_json_body()
+        except Exception as ex:
+            raise HTTPError(400, Errors.INVALID_INPUT_DATA_FORMAT) from ex
+
+        if not input_data:
+            raise HTTPError(400, Errors.NO_INPUT_DATA)
+
+        self.finish(AquaRecommendApp().which_gpu(**input_data))
+
+
+__handlers__ = [
+    ("recommendation/?([^/]*)", AquaRecommendHandler),
+]
@@ -1,94 +1,152 @@
 {
   "shapes": {
-    "BM.GPU.A10.4": {
-      "gpu_count": 4,
-      "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+    "BM.GPU.H200.8": {
+      "gpu_count": 8,
+      "gpu_memory_in_gbs": 1128,
+      "gpu_type": "H200",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+          "cost": 100,
+          "performance": 110
+      }
     },
-    "BM.GPU.A100-V2.8": {
+    "BM.GPU.H100.8": {
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "A100"
+      "gpu_type": "H100",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 100,
+        "performance": 100
+      }
     },
-    "BM.GPU.B4.8": {
+    "BM.GPU.MI300X.8": {
       "gpu_count": 8,
-      "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_memory_in_gbs": 1536,
+      "gpu_type": "MI300X",
+      "quantization": ["fp8", "gguf"],
+      "ranking": {
+        "cost": 90,
+        "performance": 90
+      }
     },
-    "BM.GPU.H100.8": {
+    "BM.GPU.A100-V2.8": {
       "gpu_count": 8,
       "gpu_memory_in_gbs": 640,
-      "gpu_type": "H100"
+      "gpu_type": "A100",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 80,
+        "performance": 70
+      }
     },
-    "BM.GPU.H200.8": {
+    "BM.GPU.B4.8": {
       "gpu_count": 8,
-      "gpu_memory_in_gbs": 1128,
-      "gpu_type": "H200"
+      "gpu_memory_in_gbs": 320,
+      "gpu_type": "A100",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 70,
+        "performance": 60
+      }
     },
     "BM.GPU.L40S-NC.4": {
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
+      "gpu_type": "L40S",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "BM.GPU.L40S.4": {
       "gpu_count": 4,
       "gpu_memory_in_gbs": 192,
-      "gpu_type": "L40S"
-    },
-    "BM.GPU.MI300X.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 1536,
-      "gpu_type": "MI300X"
-    },
-    "BM.GPU2.2": {
-      "gpu_count": 2,
-      "gpu_memory_in_gbs": 32,
-      "gpu_type": "P100"
-    },
-    "BM.GPU3.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 128,
-      "gpu_type": "V100"
-    },
-    "BM.GPU4.8": {
-      "gpu_count": 8,
-      "gpu_memory_in_gbs": 320,
-      "gpu_type": "A100"
+      "gpu_type": "L40S",
+      "quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking": {
+        "cost": 60,
+        "performance": 80
+      }
     },
     "VM.GPU.A10.1": {
       "gpu_count": 1,
       "gpu_memory_in_gbs": 24,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 20,
+        "performance": 30
+      }
     },
     "VM.GPU.A10.2": {
       "gpu_count": 2,
       "gpu_memory_in_gbs": 48,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 40,
+        "performance": 40
+      }
     },
-    "VM.GPU.A10.4": {
+    "BM.GPU.A10.4": {
       "gpu_count": 4,
       "gpu_memory_in_gbs": 96,
-      "gpu_type": "A10"
+      "gpu_type": "A10",
+      "quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 50,
+        "performance": 50
+      }
+    },
+    "BM.GPU2.2": {
+      "gpu_count": 2,
+      "gpu_memory_in_gbs": 32,
+      "gpu_type": "P100",
+      "quantization": ["fp16"],
+      "ranking": {
+        "cost": 30,
+        "performance": 20
+      }
     },
     "VM.GPU2.1": {
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "P100"
+      "gpu_type": "P100",
+      "quantization": ["fp16"],
+      "ranking": {
+        "cost": 10,
+        "performance": 10
+      }
     },
     "VM.GPU3.1": {
       "gpu_count": 1,
       "gpu_memory_in_gbs": 16,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "quantization" : ["gptq", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
+      "ranking" : {
+        "cost": 35,
+        "performance": 10 
+      }
     },
     "VM.GPU3.2": {
       "gpu_count": 2,
       "gpu_memory_in_gbs": 32,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "ranking" : {
+        "cost": 45,
+        "performance": 20 
+      }
     },
     "VM.GPU3.4": {
       "gpu_count": 4,
       "gpu_memory_in_gbs": 64,
-      "gpu_type": "V100"
+      "gpu_type": "V100",
+      "ranking" : {
+        "cost": 55,
+        "performance": 45 
+      }
     }
   }
-}
+}
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+from ads.aqua.shaperecommend.recommend import AquaRecommendApp
+
+__all__ = ["AquaRecommendApp"]