Skip to content

[WIP][AQUA] GPU Shape Recommendation #1221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions ads/aqua/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ads.aqua.finetuning import AquaFineTuningApp
from ads.aqua.model import AquaModelApp
from ads.aqua.modeldeployment import AquaDeploymentApp
from ads.aqua.shaperecommend.recommend import AquaRecommendApp
from ads.aqua.verify_policies import AquaVerifyPoliciesApp
from ads.common.utils import LOG_LEVELS

Expand All @@ -31,6 +32,7 @@ class AquaCommand:
deployment = AquaDeploymentApp
evaluation = AquaEvaluationApp
verify_policies = AquaVerifyPoliciesApp
recommend = AquaRecommendApp

def __init__(
self,
Expand Down Expand Up @@ -96,18 +98,20 @@ def _validate_value(flag, value):
"If you intend to chain a function call to the result, please separate the "
"flag and the subsequent function call with separator `-`."
)

@staticmethod
def install():
"""Install ADS Aqua Extension from wheel file. Set enviroment variable `AQUA_EXTENSTION_PATH` to change the wheel file path.

Return
Return
------
int:
Installatation status.
"""
import subprocess

wheel_file_path = os.environ.get("AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl")
status = subprocess.run(f"pip install {wheel_file_path}",shell=True)
return status.check_returncode
wheel_file_path = os.environ.get(
"AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
)
status = subprocess.run(f"pip install {wheel_file_path}", shell=True, check=False)
return status.check_returncode
21 changes: 21 additions & 0 deletions ads/aqua/common/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ class Config:
arbitrary_types_allowed = True
protected_namespaces = ()

class ComputeRank(Serializable):
"""
Represents the cost and performance ranking for a compute shape.
"""
cost: int = Field(
None, description="The relative rank of the cost of the shape. Range is [10 (cost-effective), 100 (most-expensive)]"
)

performance: int = Field(
None, description="The relative rank of the performance of the shape. Range is [10 (lower performance), 110 (highest performance)]"
)

class GPUSpecs(Serializable):
"""
Expand All @@ -61,6 +72,12 @@ class GPUSpecs(Serializable):
gpu_type: Optional[str] = Field(
default=None, description="The type of GPU (e.g., 'V100, A100, H100')."
)
quantization: Optional[List[str]] = Field(
default_factory=list, description="The quantization format supported by shape. (ex. bitsandbytes, fp8, etc.)"
)
ranking: Optional[ComputeRank] = Field(
None, description="The relative rank of the cost and performance of the shape."
)


class GPUShapesIndex(Serializable):
Expand All @@ -84,6 +101,10 @@ class ComputeShapeSummary(Serializable):
including CPU, memory, and optional GPU characteristics.
"""

available: Optional[bool] = Field(
default = False,
description="True if shape is available on user tenancy, "
)
core_count: Optional[int] = Field(
default=None,
description="Total number of CPU cores available for the compute shape.",
Expand Down
5 changes: 5 additions & 0 deletions ads/aqua/common/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class AquaValueError(AquaError, ValueError):
def __init__(self, reason, status=403, service_payload=None):
super().__init__(reason, status, service_payload)

class AquaRecommendationError(AquaError):
"""Exception raised for models incompatible with shape recommendation tool."""

def __init__(self, reason, status=400, service_payload=None):
super().__init__(reason, status, service_payload)

class AquaFileNotFoundError(AquaError, FileNotFoundError):
"""Exception raised for missing target file."""
Expand Down
2 changes: 2 additions & 0 deletions ads/aqua/extension/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from ads.aqua.extension.evaluation_handler import __handlers__ as __eval_handlers__
from ads.aqua.extension.finetune_handler import __handlers__ as __finetune_handlers__
from ads.aqua.extension.model_handler import __handlers__ as __model_handlers__
from ads.aqua.extension.recommend_handler import __handlers__ as __gpu_handlers__
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can name it as __shape_handler?

from ads.aqua.extension.ui_handler import __handlers__ as __ui_handlers__
from ads.aqua.extension.ui_websocket_handler import __handlers__ as __ws_handlers__

Expand All @@ -24,6 +25,7 @@
+ __ui_handlers__
+ __eval_handlers__
+ __ws_handlers__
+ __gpu_handlers__
)


Expand Down
47 changes: 47 additions & 0 deletions ads/aqua/extension/recommend_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from tornado.web import HTTPError

from ads.aqua.common.decorator import handle_exceptions
from ads.aqua.extension.base_handler import AquaAPIhandler
from ads.aqua.extension.errors import Errors
from ads.aqua.shaperecommend.recommend import AquaRecommendApp
from ads.config import COMPARTMENT_OCID


class AquaRecommendHandler(AquaAPIhandler):
"""
Handler for Aqua GPU Recommendation REST APIs.

Methods
-------
post(self, *args, **kwargs)
Obtains the eligible compute shapes that would fit the specifed model, context length, model weights, and quantization level.

Raises
------
HTTPError: For various failure scenarios such as invalid input format, missing data, etc.
"""

@handle_exceptions
def post(self, *args, **kwargs): # noqa: ARG002
"""
Obtains the eligible compute shapes that would fit the specifed model, context length, model weights, and quantization level.

Returns
-------
ShapeRecommendationReport
Report containing shape recommendations and troubleshooting advice, if any.
"""
try:
input_data = self.get_json_body()
except Exception as ex:
raise HTTPError(400, Errors.INVALID_INPUT_DATA_FORMAT) from ex

if not input_data:
raise HTTPError(400, Errors.NO_INPUT_DATA)

self.finish(AquaRecommendApp().which_gpu(**input_data))


__handlers__ = [
("recommendation/?([^/]*)", AquaRecommendHandler),
]
148 changes: 103 additions & 45 deletions ads/aqua/resources/gpu_shapes_index.json
Original file line number Diff line number Diff line change
@@ -1,94 +1,152 @@
{
"shapes": {
"BM.GPU.A10.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 96,
"gpu_type": "A10"
"BM.GPU.H200.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 1128,
"gpu_type": "H200",
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 100,
"performance": 110
}
},
"BM.GPU.A100-V2.8": {
"BM.GPU.H100.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 640,
"gpu_type": "A100"
"gpu_type": "H100",
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 100,
"performance": 100
}
},
"BM.GPU.B4.8": {
"BM.GPU.MI300X.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 320,
"gpu_type": "A100"
"gpu_memory_in_gbs": 1536,
"gpu_type": "MI300X",
"quantization": ["fp8", "gguf"],
"ranking": {
"cost": 90,
"performance": 90
}
},
"BM.GPU.H100.8": {
"BM.GPU.A100-V2.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 640,
"gpu_type": "H100"
"gpu_type": "A100",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 80,
"performance": 70
}
},
"BM.GPU.H200.8": {
"BM.GPU.B4.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 1128,
"gpu_type": "H200"
"gpu_memory_in_gbs": 320,
"gpu_type": "A100",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 70,
"performance": 60
}
},
"BM.GPU.L40S-NC.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 192,
"gpu_type": "L40S"
"gpu_type": "L40S",
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 60,
"performance": 80
}
},
"BM.GPU.L40S.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 192,
"gpu_type": "L40S"
},
"BM.GPU.MI300X.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 1536,
"gpu_type": "MI300X"
},
"BM.GPU2.2": {
"gpu_count": 2,
"gpu_memory_in_gbs": 32,
"gpu_type": "P100"
},
"BM.GPU3.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 128,
"gpu_type": "V100"
},
"BM.GPU4.8": {
"gpu_count": 8,
"gpu_memory_in_gbs": 320,
"gpu_type": "A100"
"gpu_type": "L40S",
"quantization": ["awq", "gptq", "marlin", "fp8", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking": {
"cost": 60,
"performance": 80
}
},
"VM.GPU.A10.1": {
"gpu_count": 1,
"gpu_memory_in_gbs": 24,
"gpu_type": "A10"
"gpu_type": "A10",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add FP8 for the A10 shapes as well.

"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking" : {
"cost": 20,
"performance": 30
}
},
"VM.GPU.A10.2": {
"gpu_count": 2,
"gpu_memory_in_gbs": 48,
"gpu_type": "A10"
"gpu_type": "A10",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking" : {
"cost": 40,
"performance": 40
}
},
"VM.GPU.A10.4": {
"BM.GPU.A10.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 96,
"gpu_type": "A10"
"gpu_type": "A10",
"quantization": ["awq", "gptq", "marlin", "int8", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking" : {
"cost": 50,
"performance": 50
}
},
"BM.GPU2.2": {
"gpu_count": 2,
"gpu_memory_in_gbs": 32,
"gpu_type": "P100",
"quantization": ["fp16"],
"ranking": {
"cost": 30,
"performance": 20
}
},
"VM.GPU2.1": {
"gpu_count": 1,
"gpu_memory_in_gbs": 16,
"gpu_type": "P100"
"gpu_type": "P100",
"quantization": ["fp16"],
"ranking": {
"cost": 10,
"performance": 10
}
},
"VM.GPU3.1": {
"gpu_count": 1,
"gpu_memory_in_gbs": 16,
"gpu_type": "V100"
"gpu_type": "V100",
"quantization" : ["gptq", "bitblas", "aqlm", "bitsandbytes", "deepspeedfp", "gguf"],
"ranking" : {
"cost": 35,
"performance": 10
}
},
"VM.GPU3.2": {
"gpu_count": 2,
"gpu_memory_in_gbs": 32,
"gpu_type": "V100"
"gpu_type": "V100",
"ranking" : {
"cost": 45,
"performance": 20
}
},
"VM.GPU3.4": {
"gpu_count": 4,
"gpu_memory_in_gbs": 64,
"gpu_type": "V100"
"gpu_type": "V100",
"ranking" : {
"cost": 55,
"performance": 45
}
}
}
}
}
6 changes: 6 additions & 0 deletions ads/aqua/shaperecommend/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env python
# Copyright (c) 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
from ads.aqua.shaperecommend.recommend import AquaRecommendApp

__all__ = ["AquaRecommendApp"]
Loading