Skip to content

ODSC 68580- Update Evaluation SDK to Support Multi-Model Deployment #1085

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 84 additions & 1 deletion ads/aqua/evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
Tags,
)
from ads.aqua.common.errors import (
AquaError,
AquaFileExistsError,
AquaFileNotFoundError,
AquaMissingKeyError,
Expand Down Expand Up @@ -75,6 +76,7 @@
CreateAquaEvaluationDetails,
)
from ads.aqua.evaluation.errors import EVALUATION_JOB_EXIT_CODE_MESSAGE
from ads.aqua.model.constants import ModelCustomMetadataFields
from ads.aqua.ui import AquaContainerConfig
from ads.common.auth import default_signer
from ads.common.object_storage_details import ObjectStorageDetails
Expand Down Expand Up @@ -183,6 +185,23 @@ def create(
evaluation_source = ModelDeployment.from_id(
create_aqua_evaluation_details.evaluation_source_id
)

if Tags.MULTIMODEL_TYPE_TAG in evaluation_source.freeform_tags:
multi_model_id = evaluation_source.freeform_tags.get(
Tags.AQUA_MODEL_ID_TAG, UNKNOWN
)

if not multi_model_id:
raise AquaRuntimeError(
f"Invalid multi model deployment {multi_model_id}."
f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment."
)

aqua_model = DataScienceModel.from_id(multi_model_id)
AquaEvaluationApp.validate_model_name(
aqua_model, create_aqua_evaluation_details
)

try:
if (
evaluation_source.runtime.type
Expand Down Expand Up @@ -550,6 +569,70 @@ def create(
parameters=AquaEvalParams(),
)

@staticmethod
def validate_model_name(
evaluation_source: DataScienceModel,
create_aqua_evaluation_details: CreateAquaEvaluationDetails,
) -> None:
"""
Validates the user input of the model name when creating an Aqua evaluation.

Parameters
----------
evaluation_source: DataScienceModel
The DataScienceModel Object which contains all metadata
about each model in a single and multi model deployment.
create_aqua_evaluation_details: CreateAquaEvaluationDetails
The CreateAquaEvaluationDetails data class which contains all
required and optional fields to create the aqua evaluation.

Raises
-------
AquaValueError:
- When the user fails to specify any input for the model name.
- When the user supplies a model name that does not match the model name set in the DataScienceModel metadata.
- When the DataScienceModel metadata lacks core attributes for validating the name"""
user_model_parameters = create_aqua_evaluation_details.model_parameters

custom_metadata_list = evaluation_source.custom_metadata_list
user_model_name = user_model_parameters.get("model")

model_count = custom_metadata_list.get(ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT)

if model_count and custom_metadata_list:
model_group_count = int(model_count.value)
else:
logger.debug(
f"The ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT or custom_metadata_list (ModelCustomMetadata) is missing from the metadata in evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
)
raise AquaRuntimeError(
"Recreate the model deployment and retry the evaluation. An issue occured when initalizing the model group during deployment."
)

model_names = [
custom_metadata_list.get(f"model-name-{idx}")
for idx in range(model_group_count)
]

valid_model_names = ", ".join(name.value for name in model_names if name is not None)

if "model" not in user_model_parameters:
logger.debug(
f"User did not input model name for multi model deployment evaluation with evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
)
raise AquaValueError(
f"Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment. The valid model names for this Model Deployment are {valid_model_names}."
)

if user_model_name not in model_names:

logger.debug(
f"User input for model name was {user_model_name}, expected {valid_model_names} evaluation source ID: {create_aqua_evaluation_details.evaluation_source_id}"
)
raise AquaValueError(
f"Provide the correct model name. The valid model names for this Model Deployment are {valid_model_names}."
)

def _build_evaluation_runtime(
self,
evaluation_id: str,
Expand Down Expand Up @@ -1392,7 +1475,7 @@ def _fetch_jobrun(
)
except Exception as e:
logger.debug(
f"Failed to retreive job run: {jobrun_id}. " f"DEBUG INFO: {str(e)}"
f"Failed to retreive job run: {jobrun_id}. DEBUG INFO: {str(e)}"
)
jobrun = None

Expand Down
62 changes: 62 additions & 0 deletions tests/unitary/with_extras/aqua/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ads.aqua.common import utils
from ads.aqua.common.enums import Tags
from ads.aqua.common.errors import (
AquaError,
AquaFileNotFoundError,
AquaMissingKeyError,
AquaRuntimeError,
Expand All @@ -34,8 +35,10 @@
AquaEvalMetrics,
AquaEvalReport,
AquaEvaluationSummary,
CreateAquaEvaluationDetails,
)
from ads.aqua.extension.base_handler import AquaAPIhandler
from ads.aqua.model.constants import ModelCustomMetadataFields
from ads.jobs.ads_job import DataScienceJob, DataScienceJobRun, Job
from ads.model import DataScienceModel
from ads.model.deployment.model_deployment import ModelDeployment
Expand Down Expand Up @@ -353,6 +356,7 @@ class TestDataset:
COMPARTMENT_ID = "ocid1.compartment.oc1..<UNIQUE_OCID>"
EVAL_ID = "ocid1.datasciencemodel.oc1.iad.<OCID>"
INVALID_EVAL_ID = "ocid1.datasciencemodel.oc1.phx.<OCID>"
MODEL_DEPLOYMENT_ID = "ocid1.datasciencemodeldeployment.oc1.<region>.<MD_OCID>"


class TestAquaEvaluation(unittest.TestCase):
Expand Down Expand Up @@ -533,6 +537,64 @@ def test_create_evaluation(
"time_created": f"{oci_dsc_model.time_created}",
}

@parameterized.expand(
[
(
{},
"Provide the model name. For evaluation, a single model needs to be targeted using the name in the multi model deployment. The valid model names for this Model Deployment are model_one, model_two, model_three."
),
(
{"model": "wrong_model_name"},
"Provide the correct model name. The valid model names for this Model Deployment are model_one, model_two, model_three."
)
])
@patch("ads.aqua.evaluation.evaluation.AquaEvaluationApp.create")
def test_validate_model_name(
self,
mock_model_parameters,
expected_message,
mock_model
):
curr_dir = os.path.dirname(__file__)

eval_model_freeform_tags = {"ftag1": "fvalue1", "ftag2": "fvalue2"}
eval_model_defined_tags = {"dtag1": "dvalue1", "dtag2": "dvalue2"}

eval_model_freeform_tags[Tags.MULTIMODEL_TYPE_TAG] = "true"
eval_model_freeform_tags[Tags.AQUA_TAG] = "active"

create_aqua_evaluation_details = dict( # noqa: C408
evaluation_source_id= TestDataset.MODEL_DEPLOYMENT_ID,
evaluation_name="test_evaluation_name",
dataset_path="oci://dataset_bucket@namespace/prefix/dataset.jsonl",
report_path="oci://report_bucket@namespace/prefix/",
model_parameters=mock_model_parameters,
shape_name="VM.Standard.E3.Flex",
block_storage_size=1,
experiment_name="test_experiment_name",
memory_in_gbs=1,
ocpus=1,
freeform_tags=eval_model_freeform_tags,
defined_tags=eval_model_defined_tags,
)


aqua_multi_model = os.path.join(
curr_dir, "test_data/deployment/aqua_multi_model.yaml"
)

mock_model = DataScienceModel.from_yaml(
uri=aqua_multi_model
)

mock_create_aqua_evaluation_details = MagicMock(**create_aqua_evaluation_details, spec=CreateAquaEvaluationDetails)

try:
AquaEvaluationApp.validate_model_name(mock_model, mock_create_aqua_evaluation_details)
except AquaError as e:
print(str(e))
self.assertEqual(str(e), expected_message)

def test_get_service_model_name(self):
# get service model name from fine tuned model deployment
source = ModelDeployment().with_freeform_tags(
Expand Down