Skip to content

ODSC 68580- Update Evaluation SDK to Support Multi-Model Deployment #1085

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion ads/aqua/evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
CreateAquaEvaluationDetails,
)
from ads.aqua.evaluation.errors import EVALUATION_JOB_EXIT_CODE_MESSAGE
from ads.aqua.model.constants import ModelCustomMetadataFields
from ads.aqua.ui import AquaContainerConfig
from ads.common.auth import default_signer
from ads.common.object_storage_details import ObjectStorageDetails
Expand Down Expand Up @@ -183,6 +184,26 @@ def create(
evaluation_source = ModelDeployment.from_id(
create_aqua_evaluation_details.evaluation_source_id
)
try:
if Tags.MULTIMODEL_TYPE_TAG in evaluation_source.freeform_tags:
multi_model_id = evaluation_source.freeform_tags.get(
Tags.AQUA_MODEL_ID_TAG, UNKNOWN
)

if not multi_model_id:
raise AquaRuntimeError(
f"Invalid multi model deployment {multi_model_id}."
f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
)

aqua_model = DataScienceModel.from_id(multi_model_id)
AquaEvaluationApp.validate_name_multi_model(
aqua_model, create_aqua_evaluation_details
)

except (AquaRuntimeError, AquaValueError) as err:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we don't need to add AquaValueError into except section. Ot will work by itself.

Copy link
Member Author

@elizjo elizjo Feb 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I will remove except (AquaRuntimeError, AquaValueError) as err: and the try block. The evaluation_handler.py post method (that calls .create() method here) has the @handle_exceptions decorator, which has the except AquaError as error. I believe that the decorator will catch the exception, so this code is redundant here.

raise AquaValueError(f"{err}") from err

try:
if (
evaluation_source.runtime.type
Expand Down Expand Up @@ -550,6 +571,43 @@ def create(
parameters=AquaEvalParams(),
)

@staticmethod
def validate_name_multi_model(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let name it - validate_model_name? Also please add description for this method.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed the name to validate_model_name

evaluation_source: DataScienceModel,
create_aqua_evaluation_details: CreateAquaEvaluationDetails,
):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: ->None?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

user_model_parameters = create_aqua_evaluation_details.model_parameters
if "model" not in user_model_parameters:
logger.debug(
f"User did not input model name for multi model deployment evaluation with evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
)
raise AquaValueError(
"Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can show the valid names in this message.

Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment. The valid model names for this Model Deployment are {valid_model_names}.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

)

custom_metadata_list = evaluation_source.custom_metadata_list
user_model_name = user_model_parameters.get("model")

model_group_count = int(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this fail if ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT is missing from the custom metadata?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

corrected

custom_metadata_list.get(
ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT
).value
)

model_names = [
custom_metadata_list.get(f"model-name-{idx}").value
for idx in range(model_group_count)
]

if user_model_name not in model_names:
valid_model_names = ", ".join(map(str, model_names))
logger.debug(
f"User input for model name was {user_model_name}, expected {valid_model_names} evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
)
raise AquaValueError(
f"Provide the correct model name. The valid model names for this Model Deployment are {valid_model_names}."
)

def _build_evaluation_runtime(
self,
evaluation_id: str,
Expand Down Expand Up @@ -1392,7 +1450,7 @@ def _fetch_jobrun(
)
except Exception as e:
logger.debug(
f"Failed to retreive job run: {jobrun_id}. " f"DEBUG INFO: {str(e)}"
f"Failed to retreive job run: {jobrun_id}. DEBUG INFO: {str(e)}"
)
jobrun = None

Expand Down
65 changes: 64 additions & 1 deletion tests/unitary/with_extras/aqua/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
AquaEvalMetrics,
AquaEvalReport,
AquaEvaluationSummary,
CreateAquaEvaluationDetails,
)
from ads.aqua.extension.base_handler import AquaAPIhandler
from ads.jobs.ads_job import DataScienceJob, DataScienceJobRun, Job
Expand Down Expand Up @@ -353,6 +354,7 @@ class TestDataset:
COMPARTMENT_ID = "ocid1.compartment.oc1..<UNIQUE_OCID>"
EVAL_ID = "ocid1.datasciencemodel.oc1.iad.<OCID>"
INVALID_EVAL_ID = "ocid1.datasciencemodel.oc1.phx.<OCID>"
MODEL_DEPLOYMENT_ID = "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"


class TestAquaEvaluation(unittest.TestCase):
Expand Down Expand Up @@ -449,7 +451,7 @@ def test_create_evaluation(
mock_from_id.return_value = foundation_model

experiment = MagicMock()
experiment.id = "test_experiment_id"
experiment.id = "ocid1.datasciencemodelversionset.oc1.iad.amaaaaaav66vvniakngdzelb5hcgjd6yvfejksu2excidvvi3s5s5whtmdea"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not use real IDs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

mock_mvs_create.return_value = experiment

evaluation_model = MagicMock()
Expand Down Expand Up @@ -533,6 +535,67 @@ def test_create_evaluation(
"time_created": f"{oci_dsc_model.time_created}",
}

@parameterized.expand(
[
(
{"model": "model_one"},
None
),
(
{},
"Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment."
),
(
{"model": "wrong_model_name"},
"Provide the correct model name. The valid model names for this Model Deployment are model_one, model_two, model_three."
)
])
@patch("ads.aqua.evaluation.evaluation.AquaEvaluationApp.create")
def test_validate_multi_model_evaluation(
self,
mock_model_parameters,
expected_message,
mock_model
):
curr_dir = os.path.dirname(__file__)

eval_model_freeform_tags = {"ftag1": "fvalue1", "ftag2": "fvalue2"}
eval_model_defined_tags = {"dtag1": "dvalue1", "dtag2": "dvalue2"}

eval_model_freeform_tags[Tags.MULTIMODEL_TYPE_TAG] = "true"
eval_model_freeform_tags[Tags.AQUA_TAG] = "active"

create_aqua_evaluation_details = dict( # noqa: C408
evaluation_source_id= TestDataset.MODEL_DEPLOYMENT_ID,
evaluation_name="test_evaluation_name",
dataset_path="oci://dataset_bucket@namespace/prefix/dataset.jsonl",
report_path="oci://report_bucket@namespace/prefix/",
model_parameters=mock_model_parameters,
shape_name="VM.Standard.E3.Flex",
block_storage_size=1,
experiment_name="test_experiment_name",
memory_in_gbs=1,
ocpus=1,
freeform_tags=eval_model_freeform_tags,
defined_tags=eval_model_defined_tags,
)


aqua_multi_model = os.path.join(
curr_dir, "test_data/deployment/aqua_multi_model.yaml"
)

mock_model = DataScienceModel.from_yaml(
uri=aqua_multi_model
)

mock_create_aqua_evaluation_details = MagicMock(**create_aqua_evaluation_details, spec=CreateAquaEvaluationDetails)

try:
AquaEvaluationApp.validate_name_multi_model(mock_model, mock_create_aqua_evaluation_details)
except Exception as e:
self.assertEqual(str(e), expected_message)

def test_get_service_model_name(self):
# get service model name from fine tuned model deployment
source = ModelDeployment().with_freeform_tags(
Expand Down